diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f8e715d..df8e7ad 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1209,7 +1209,14 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "joins unnecessary memory will be allocated and then trimmed."), HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), - + HIVEMAPJOINFULLOUER("hive.mapjoin.full.outer", true, + "Whether to use MapJoin for FULL OUTER JOINs."), + HIVE_TEST_MAPJOINFULLOUER_OVERRIDE("hive.test.mapjoin.full.outer.override", false, + "internal use only, used to override the hive.mapjoin.full.outer setting " + + "The default is false, of course", + true), + HIVEMAPJOINFULLOUERHYBRIDGRACE("hive.mapjoin.full.outer.hybridgrace", true, + "Whether to use hybrid grace hash join for FULL OUTER MapJoin. Tez only."), HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, "How many rows with the same key value should be cached in memory per smb joined table."), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, diff --git data/files/fullouter_long_big_1a.txt data/files/fullouter_long_big_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/fullouter_long_big_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/fullouter_long_big_1a_nonull.txt data/files/fullouter_long_big_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/fullouter_long_big_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/fullouter_long_big_1b.txt data/files/fullouter_long_big_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/fullouter_long_big_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/fullouter_long_big_1c.txt data/files/fullouter_long_big_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/fullouter_long_big_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/fullouter_long_big_1d.txt data/files/fullouter_long_big_1d.txt new file mode 100644 index 0000000..4137f67 --- /dev/null +++ data/files/fullouter_long_big_1d.txt @@ -0,0 +1,12 @@ +-702028721 +-702028721 +-1780951928 +-670834064 +-814597051 +\N +-814597051 +-814597051 +-702028721 +-2038654700 +\N +-814597051 diff --git data/files/fullouter_long_small_1a.txt data/files/fullouter_long_small_1a.txt new file mode 100644 index 0000000..45d5825 --- /dev/null +++ data/files/fullouter_long_small_1a.txt @@ -0,0 +1,54 @@ +-1339636982994067311,2000-06-20 +-2575185053386712613,2105-01-21 +\N,2098-02-10 +-6784441713807772877,1845-02-16 +\N,2024-01-23 +-4224290881682877258,2185-07-08 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +434940853096155515,2275-02-08 +3873405809071478736,2034-06-09 +-2184423060953067642,1880-10-06 +7297177530102477725,1921-05-11 +7937120928560087303,2083-03-14 +\N,2242-02-08 +-2688622006344936758,2129-01-11 +214451696109242839,1977-01-04 +-4961171400048338491,2196-08-10 +4436884039838843341,2031-05-23 +2438535236662373438,1916-01-10 +6049335087268933751,2282-06-09 +8755921538765428593,1827-05-01 +5252407779338300447,2039-03-10 +-2184423060953067642,1853-07-06 +7297177530102477725,1926-04-12 +-2098090254092150988,1817-03-12 +-5754527700632192146,1958-07-15 +-614848861623872247,2112-11-09 +5246983111579595707,1817-07-01 +-2098090254092150988,2219-12-23 +-5706981533666803767,2151-06-09 +7297177530102477725,2125-08-26 +-7707546703881534780,2134-08-20 +214451696109242839,2179-04-18 +3845554233155411208,1805-11-10 +3905351789241845882,2045-12-05 +2438535236662373438,2026-06-23 +-2688622006344936758,1948-10-15 +6049335087268933751,2086-12-17 +-2575185053386712613,1809-07-12 +-327698348664467755,2222-10-15 +-4224290881682877258,1813-05-17 +3873405809071478736,2164-04-23 +-5706981533666803767,1800-09-20 +214451696109242839,1855-05-12 +2438535236662373438,1881-09-16 +5252407779338300447,2042-04-26 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-6784441713807772877,2054-06-17 +5246983111579595707,2260-05-11 +-1339636982994067311,2008-12-03 +3873405809071478736,1918-11-20 +-4224290881682877258,2120-01-16 +3845554233155411208,2264-04-05 diff --git data/files/fullouter_long_small_1a_nonull.txt data/files/fullouter_long_small_1a_nonull.txt new file mode 100644 index 0000000..bf94d5a --- /dev/null +++ data/files/fullouter_long_small_1a_nonull.txt @@ -0,0 +1,51 @@ +5246983111579595707,1817-07-01 +4436884039838843341,2031-05-23 +-4224290881682877258,1813-05-17 +-4961171400048338491,2196-08-10 +-2575185053386712613,2105-01-21 +5252407779338300447,2042-04-26 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +2438535236662373438,1881-09-16 +214451696109242839,2179-04-18 +2438535236662373438,2026-06-23 +-2184423060953067642,1853-07-06 +3873405809071478736,2164-04-23 +214451696109242839,1855-05-12 +-6784441713807772877,1845-02-16 +-2688622006344936758,1948-10-15 +7297177530102477725,1921-05-11 +-2575185053386712613,1809-07-12 +3905351789241845882,2045-12-05 +3845554233155411208,1805-11-10 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-1339636982994067311,2008-12-03 +7297177530102477725,2125-08-26 +7297177530102477725,1926-04-12 +-5706981533666803767,1800-09-20 +6049335087268933751,2282-06-09 +3845554233155411208,2264-04-05 +8755921538765428593,1827-05-01 +-1339636982994067311,2000-06-20 +-2098090254092150988,1817-03-12 +3873405809071478736,2034-06-09 +2438535236662373438,1916-01-10 +5246983111579595707,2260-05-11 +-5706981533666803767,2151-06-09 +-614848861623872247,2112-11-09 +-327698348664467755,2222-10-15 +-2184423060953067642,1880-10-06 +434940853096155515,2275-02-08 +-4224290881682877258,2120-01-16 +-5754527700632192146,1958-07-15 +-4224290881682877258,2185-07-08 +-2098090254092150988,2219-12-23 +-7707546703881534780,2134-08-20 +214451696109242839,1977-01-04 +-2688622006344936758,2129-01-11 +7937120928560087303,2083-03-14 +-6784441713807772877,2054-06-17 +3873405809071478736,1918-11-20 +6049335087268933751,2086-12-17 +5252407779338300447,2039-03-10 diff --git data/files/fullouter_long_small_1b.txt data/files/fullouter_long_small_1b.txt new file mode 100644 index 0000000..7d45fe4 --- /dev/null +++ data/files/fullouter_long_small_1b.txt @@ -0,0 +1,72 @@ +2748,2298-06-20 21:01:24 +11232,2533-11-26 12:22:18 +\N,2124-05-07 15:01:19.021 +3198,2428-06-13 16:21:33.955 +-7624,2219-12-03 17:07:19 +24870,2752-12-26 12:32:23.03685163 +14865,2943-03-21 00:42:10.505 +-8624,2644-05-04 04:45:07.839 +-30059,2269-05-04 21:23:44.000339209 +14865,2079-10-06 16:54:35.117 +-8435,2834-12-06 16:38:18.901 +10553,2168-05-05 21:10:59.000152113 +-8624,2282-03-28 07:58:16 +-15361,2219-09-15 20:15:03.000169887 +-14172,1918-09-13 11:44:24.496926711 +26484,1919-03-04 07:32:37.519 +-14172,2355-01-14 23:23:34 +-24775,2920-08-06 15:58:28.261059449 +-23117,2037-01-05 21:52:30.685952759 +17125,2236-07-14 01:54:40.927230276 +21181,2253-03-12 11:55:48.332 +-7373,2662-10-28 12:07:02.000526564 +-8087,2550-06-26 23:57:42.588007617 +29407,2385-12-14 06:03:39.597 +21181,2434-02-20 00:46:29.633 +-14172,2809-06-07 02:10:58 +13598,2421-05-20 14:18:31.000264698 +2748,2759-02-13 18:04:36.000307355 +-22422,1949-03-13 00:07:53.075 +26484,2953-03-10 02:05:26.508953676 +4510,2777-03-24 03:44:28.000169723 +-24775,2035-03-26 08:11:23.375224153 +-30059,2713-10-13 09:28:49 +-20517,2774-06-23 12:04:06.5 +11232,2038-04-06 14:53:59 +32030,2101-09-09 07:35:05.145 +-29600,2333-11-02 15:06:30 +-30306,2619-05-24 10:35:58.000774018 +-7624,2289-08-28 00:14:34 +-4279,2470-08-12 11:21:14.000955747 +-4279,2214-09-10 03:53:06 +-26998,2428-12-26 07:53:45.96925825 +17125,2629-11-15 15:34:52 +-8087,2923-07-02 11:40:26.115 +2632,2561-12-15 15:42:27 +21436,2696-05-08 05:19:24.112 +\N,2971-08-07 12:02:11.000948152 +-7624,2623-03-20 03:18:45.00006465 +-26998,2926-07-18 09:02:46.077 +11232,2507-01-27 22:04:22.49661421 +-30059,2420-12-10 22:12:30 +-15427,2355-01-08 12:34:11.617 +3198,2223-04-14 13:20:49 +-19167,2319-08-26 11:07:11.268 +14865,2220-02-28 03:41:36 +-20517,2233-12-20 04:06:56.666522799 +-15427,2046-06-07 22:58:40.728 +2748,2862-04-20 13:12:39.482805897 +-8435,2642-02-07 11:45:04.353231638 +-19167,2230-12-22 20:25:39.000242111 +-15427,2023-11-09 19:31:21 +13598,2909-06-25 23:22:50 +21436,2526-09-22 23:44:55 +-15361,2434-08-13 20:37:07.000172979 +4510,2293-01-17 13:47:41.00001006 +-8624,2120-02-15 15:36:40.000758423 +-22422,2337-07-19 06:33:02.000353352 +-26998,2268-08-04 12:48:11.848006292 +-22422,2982-12-28 06:30:26.000883228 +\N,2933-06-20 11:48:09.000839488 +3198,2736-12-20 03:59:50.343550301 +-20824,2478-11-05 00:28:05 diff --git data/files/fullouter_long_small_1c.txt data/files/fullouter_long_small_1c.txt new file mode 100644 index 0000000..ff323d3 --- /dev/null +++ data/files/fullouter_long_small_1c.txt @@ -0,0 +1,81 @@ +-1093006502,-69.55665828 +452719211,83003.43722 +1242586043,71.1485 +-934092157,-7843850349.57130038 +294598722,-3542.6 +284554389,5.727146 +90660785,12590.288613 +-99948814,-38076694.3981 +466567142,-9763217822.129028 +1909136587,-8610.078036935181 +1242586043,-4 +\N,1.089120893565337 +1039864870,987601.57 +-466171792,0 +-1681455031,-6.4543 +1755897735,-39.965207 +1585021913,745222.66808954 +448130683,-4302.485366846491 +193709887,0.8 +-424713789,0.48 +1585021913,607.22747 +-1250662632,5454127198.951479 +294598722,-9377326244.444 +193709887,-19889.83 +1039864870,0.7 +1242586043,-749975924224.63 +-1250662632,-544.554649 +-1740848088,-9.157 +-369457052,7.7 +-369457052,560.11907883090455 +90660785,-4564.517185 +466567142,-58810.60586 +466567142,196.5785295398584 +1738753776,1525.280459649262 +1816559437,-1035.7009 +-1490239076,92253.232096 +1039864870,94.04 +560745412,678.25 +-466171792,4227.5344 +1561921421,53050.55 +-99948814,-96386.438 +1519948464,152 +1719049112,-7888197 +-793950320,-16 +-466171792,69.9 +1738753776,-99817635066320.2416 +1091836730,0.02 +891262439,-0.04 +452719211,3020.2938930744636 +-2048404259,3939387044.1 +698032489,-330457.4292625839 +-1197550983,-0.5588796922 +-2123273881,-55.89198 +-2048404259,-0.3222960446251 +1585021913,-5762331.06697112 +1785750809,47443.115 +1909136587,181.07681535944 +1801735854,-1760956929364.267 +\N,4.26165227 +1801735854,-438541294.7 +150678276,-8278 +1479580778,92077343080.7 +1091836730,-5017.14 +193709887,-0.5663 +-1681455031,-11105.372477 +-1250662632,93104 +-1197550983,0.1 +\N,682070836.2649603 +-1197550983,71852.8338674412613 +1561921421,-5.405 +-1740848088,0.506394259 +150678276,15989394.8436 +-793950320,-0.1 +-1740848088,901.441 +-477147437,6 +-1264372462,0.883 +-2123273881,3.959 +-1264372462,-6993985240226 +-1264372462,-899 +-243940373,-97176129669.654953 +-243940373,-583.258 diff --git data/files/fullouter_long_small_1d.txt data/files/fullouter_long_small_1d.txt new file mode 100644 index 0000000..9778d3f --- /dev/null +++ data/files/fullouter_long_small_1d.txt @@ -0,0 +1,39 @@ +533298451 +1164387380 +1614287784 +1635405412 +-1912571616 +-894799664 +-1210744742 +-1014271154 +-747044796 +-1003639073 +436878811 +-1323620496 +-1379355738 +-1712018127 +246169862 +1431997749 +670834064 +1780951928 +-707688773 +1997943409 +1372592319 +-932176731 +162858059 +-683339273 +-497171161 +699863556 +1685473722 +41376947 +-1036083124 +1825107160 +-2038654700 +2119085509 +260588085 +-1792852276 +1831520491 +103640700 +\N +699007128 +1840266070 diff --git data/files/fullouter_multikey_big_1a.txt data/files/fullouter_multikey_big_1a.txt new file mode 100644 index 0000000..fe38c7b --- /dev/null +++ data/files/fullouter_multikey_big_1a.txt @@ -0,0 +1,13 @@ +22767,-1969080993 +-17582,-1730236061 +3556,\N +-17582,1082230084 +-17582,827141667 +1499,371855128 +-17582,9637312 +\N,1082230084 +-6131,-1969080993 +3556,-1969080993 +\N,\N +-18222,-1969080993 +-17582,267529350 diff --git data/files/fullouter_multikey_big_1a_nonull.txt data/files/fullouter_multikey_big_1a_nonull.txt new file mode 100644 index 0000000..40e84b0 --- /dev/null +++ data/files/fullouter_multikey_big_1a_nonull.txt @@ -0,0 +1,10 @@ +-17582,1082230084 +22767,-1969080993 +-17582,827141667 +-17582,-1730236061 +3556,-1969080993 +-6131,-1969080993 +-18222,-1969080993 +1499,371855128 +-17582,267529350 +-17582,9637312 diff --git data/files/fullouter_multikey_big_1b.txt data/files/fullouter_multikey_big_1b.txt new file mode 100644 index 0000000..40cfb9a --- /dev/null +++ data/files/fullouter_multikey_big_1b.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309,21635,ANCO +\N,21635,ANCO +2686-05-23 07:46:46.565832918,13212,NCYBDW +2082-07-14 04:00:40.695380469,12556,NCYBDW +2188-06-04 15:03:14.963259704,9468,AAA +2608-02-23 23:44:02.546440891,26184,NCYBDW +2093-04-10 23:36:54.846,\N,\N +2898-10-01 22:27:02.000871113,10361,NCYBDW +2306-06-21 11:02:00.143124239,1446,\N +\N,-6909,\N +\N,\N,\N +2306-06-21 11:02:00.143124239,-6909,NCYBDW +2093-04-10 23:36:54.846,1446,GHZVPWFO +\N,\N,CCWYD +2686-05-23 07:46:46.565832918,\N,GHZVPWFO +2093-04-10 23:36:54.846,28996,Q +2299-11-15 16:41:30.401,-31077,NCYBDW diff --git data/files/fullouter_multikey_small_1a.txt data/files/fullouter_multikey_small_1a.txt new file mode 100644 index 0000000..4e0742c --- /dev/null +++ data/files/fullouter_multikey_small_1a.txt @@ -0,0 +1,92 @@ +23015,258882280 +23015,-276888585 +21186,-586336015 +-22311,-2055239583 +3412,-1249487623 +\N,1082230084 +20156,-1618478138 +-17788,-738743861 +-24206,-1456409156 +30353,2044473567 +20969,-1995259010 +-23457,-63842445 +3412,-2081156563 +-6131,-1969080993 +23015,-252525791 +30353,1364268303 +23015,564751472 +15404,1078466156 +4586,-586336015 +-4117,-1386947816 +-26894,-63842445 +-17788,-1361776766 +-7386,-2112062470 +23015,-1893013623 +30353,1241923267 +-24206,641361618 +-28129,-2055239583 +-20125,-1995259010 +16166,931172175 +31443,-1968665833 +-28313,837320573 +11460,1078466156 +15061,-63842445 +13672,-63842445 +14400,-825652334 +-7386,100736776 +26944,-1995259010 +-11868,97203778 +12089,-63842445 +-28137,-63842445 +3412,1253976194 +-980,2009785365 +16696,-63842445 +-11868,930596435 +4902,1078466156 +-17582,267529350 +-12252,964377504 +20156,963883665 +-11868,1658440922 +4779,-1995259010 +-7386,-1635102480 +-28313,51228026 +-11868,1052120431 +-980,-270600267 +-20900,1078466156 +\N,\N +20156,1165375499 +30353,-1507157031 +3412,-1196037018 +22934,-1695419330 +30353,105613996 +-17788,-872691214 +-980,-333603940 +30353,-1011627089 +-11868,-3536499 +-2407,1078466156 +23015,-217613200 +-28313,-706104224 +-980,712692345 +-11868,1456809245 +-17788,528419995 +-11868,-915441041 +-980,628784462 +30353,-1007182618 +23015,-696928205 +-980,356970043 +23015,-893234501 +-980,-465544127 +-5734,1078466156 +-980,-801821285 +26738,-2055239583 +8177,-1995259010 +-11868,1318114822 +3890,1411429004 +-6061,-586336015 +3412,-2132472060 +-15212,-2055239583 +-12252,1956403781 +5957,-1995259010 +-1787,-63842445 +20156,1855042153 +-980,1310479628 diff --git data/files/fullouter_multikey_small_1a_nonull.txt data/files/fullouter_multikey_small_1a_nonull.txt new file mode 100644 index 0000000..2a8b9a1 --- /dev/null +++ data/files/fullouter_multikey_small_1a_nonull.txt @@ -0,0 +1,90 @@ +16696,-63842445 +4586,-586336015 +26738,-2055239583 +-17788,-738743861 +-28313,-706104224 +-23457,-63842445 +-20900,1078466156 +-12252,964377504 +-28313,51228026 +-11868,-3536499 +11460,1078466156 +26944,-1995259010 +20156,1855042153 +-11868,97203778 +15061,-63842445 +-17788,528419995 +-26894,-63842445 +-28313,837320573 +20156,963883665 +-15212,-2055239583 +5957,-1995259010 +30353,-1011627089 +3890,1411429004 +-980,-333603940 +13672,-63842445 +-980,628784462 +23015,-252525791 +-11868,1052120431 +-980,356970043 +23015,-217613200 +-6061,-586336015 +-5734,1078466156 +-11868,1318114822 +23015,258882280 +-2407,1078466156 +12089,-63842445 +3412,-2132472060 +-28129,-2055239583 +-980,-270600267 +16166,931172175 +-7386,100736776 +4902,1078466156 +20969,-1995259010 +22934,-1695419330 +3412,-1249487623 +3412,1253976194 +21186,-586336015 +8177,-1995259010 +-7386,-1635102480 +-11868,1456809245 +-20125,-1995259010 +-980,-801821285 +-980,1310479628 +23015,564751472 +23015,-893234501 +4779,-1995259010 +-980,2009785365 +-24206,641361618 +30353,-1507157031 +14400,-825652334 +3412,-2081156563 +20156,-1618478138 +31443,-1968665833 +-22311,-2055239583 +30353,1241923267 +-11868,930596435 +-17788,-1361776766 +-24206,-1456409156 +-7386,-2112062470 +30353,1364268303 +23015,-1893013623 +-17788,-872691214 +30353,2044473567 +-28137,-63842445 +30353,105613996 +-6131,-1969080993 +-17582,267529350 +23015,-276888585 +-12252,1956403781 +23015,-696928205 +-11868,1658440922 +-1787,-63842445 +-11868,-915441041 +-980,-465544127 +30353,-1007182618 +-980,712692345 +20156,1165375499 +3412,-1196037018 +15404,1078466156 +-4117,-1386947816 diff --git data/files/fullouter_multikey_small_1b.txt data/files/fullouter_multikey_small_1b.txt new file mode 100644 index 0000000..b56a3f7 --- /dev/null +++ data/files/fullouter_multikey_small_1b.txt @@ -0,0 +1,118 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 +2512-10-06 03:03:03,1560,X,761196.522 +2304-12-15 15:31:16,1301,T,2720.8 +1919-06-20 00:16:50.611028595,20223,ZKBC,-23 +2897-08-10 15:21:47.09,23663,XYUVBED,51.7323303273 +2086-04-09 00:03:10,20223,THXNJGFFV,-85184687349898.892 +2238-05-17 19:27:25.519,20223,KQCM,-0.01095 +2086-04-09 00:03:10,20223,THXNJGFFV,482.5383411359219 +2480-10-02 09:31:37.000770961,-26373,NBN,-5875.5197252 +2086-04-09 00:03:10,20223,THXNJGFFV,0.4396861 +2759-11-26 22:19:55.410967136,-27454,ZMY,60.6025797 +2083-06-07 09:35:19.383,-26373,MR,67892053.02376094 +2882-05-20 07:21:25.221299462,23196,U,-9951044 +2971-02-14 09:13:19,-16605,BVACIRP,-27394351.3 +2512-10-06 03:03:03,24313,QBHUG,-8423.151573236 +2882-05-20 07:21:25.221299462,23196,U,-4244.926206619 +1905-04-20 13:42:25.000469776,2638,KAUUFF,7 +2410-05-03 13:44:56,2638,PHOR,-769088.176482 +2668-06-25 07:12:37.000970744,2638,TJE,-2.7796827 +2969-01-23 14:08:04.000667259,-32485,AGEPWWLJF,-48431309405.652522 +2410-05-03 13:44:56,2638,PHOR,93262.914526611 +2512-10-06 03:03:03,13195,CRJ,14 +2018-11-25 22:27:55.84,-12202,VBDBM,98790.713907420831 +2304-12-15 15:31:16,8650,RLNO,-0.4355 +2071-07-21 20:02:32.000250697,2638,NRUV,-66198.351092 +2525-05-12 15:59:35,-24459,SAVRGA,53106747151.8633 +2637-03-12 22:25:46.385,21841,CXTI,749563668434009.65 +2018-11-25 22:27:55.84,-22419,LOTLS,342.3726040228584 +2637-03-12 22:25:46.385,21841,CXTI,7362887891522.3782 +2038-10-12 09:15:33.000539653,-19598,YKNIAJW,-642807895924.66 +2957-05-07 10:41:46,20223,OWQT,-586953.153681 +2304-12-15 15:31:16,11101,YJCKKCR,1279917802.42 +2355-09-23 19:52:34.638084141,-19598,H,92.15 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,2.1577659 +2355-09-23 19:52:34.638084141,-19598,H,74179461.880493 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-81 +\N,-12914,ZVEUKC,221 +2743-12-27 05:16:19.000573579,-12914,ZVEUKC,-811984611.5178497 +1957-02-01 14:00:29.000548421,-16085,ZVEUKC,-2312.8149 +2201-07-05 17:22:06.084206844,-24459,UBGT,1.5069483282 +2461-03-09 09:54:45.000982385,-16454,ZSMB,8694.89 +2169-04-02 06:30:32,23855,PDVQATOS,-1515597428 +2304-12-15 15:31:16,30285,GSJPSIYOU,0.2 +2913-07-17 15:06:58.041,-10206,\N,-0.2 +2169-04-02 06:30:32,23855,PDVQATOS,-4016.9608 +2759-11-26 22:19:55.410967136,-27454,ZMY,368 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,726945733.4193 +2304-12-15 15:31:16,11101,YJCKKCR,-0.5 +2462-12-16 23:11:32.633305644,-26373,CB,-582687 +2357-05-08 07:09:09.000482799,6226,ZSMB,-32.46 +2304-12-15 15:31:16,12587,OPW,-4.59489504 diff --git data/files/fullouter_string_big_1a.txt data/files/fullouter_string_big_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/fullouter_string_big_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/fullouter_string_big_1a_nonull.txt data/files/fullouter_string_big_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/fullouter_string_big_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/fullouter_string_big_1a_old.txt data/files/fullouter_string_big_1a_old.txt new file mode 100644 index 0000000..1fa51ad --- /dev/null +++ data/files/fullouter_string_big_1a_old.txt @@ -0,0 +1,13 @@ +WXHJ +WXHJ +WXHJ +WXHJ +WXHJ +QNCYBDW +PXLD +PXLD +PXLD +UA +\N +FTWURVH +MXGDMBD diff --git data/files/fullouter_string_small_1a.txt data/files/fullouter_string_small_1a.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/fullouter_string_small_1a.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/fullouter_string_small_1a_nonull.txt data/files/fullouter_string_small_1a_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/fullouter_string_small_1a_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git data/files/fullouter_string_small_1a_old.txt data/files/fullouter_string_small_1a_old.txt new file mode 100644 index 0000000..505c403 --- /dev/null +++ data/files/fullouter_string_small_1a_old.txt @@ -0,0 +1,38 @@ +,2021-02-21,2802-04-21 18:48:18.5933838 +,1985-01-22,2111-01-10 15:44:28 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +KL,1980-09-22,2073-08-25 11:51:10.318 +FYW,1807-03-20,2305-08-17 01:32:44 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BEP,2141-02-19,2521-06-09 01:20:07.121 +BEP,2206-08-10,2331-10-09 10:59:51 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +IWEZJHKE,\N,\N +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +GOYJHW,1959-04-27,\N +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +ZNOUDCR,\N,1988-04-23 08:40:21 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +\N,1865-11-08,2893-04-07 07:36:12 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +\N,1915-02-22,2554-10-27 09:34:30 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +LOTLS,2126-09-16,1977-12-15 15:28:56 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java index af446db..7c86bcf 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.tez.ObjectCache; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -26,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -74,23 +77,24 @@ public void bench() throws Exception { } protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, - VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { + VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, + String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, + int[] bigTableRetainColumnNums, + int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { this.vectorMapJoinVariation = vectorMapJoinVariation; this.mapJoinImplementation = mapJoinImplementation; testDesc = new MapJoinTestDescription( hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, + bigTableTypeInfos, bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + MapJoinPlanVariation.SHARED_SMALL_TABLE); // Prepare data. Good for ANY implementation variation. testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); @@ -109,7 +113,7 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, if (!isVectorOutput) { bigTableRows = VectorBatchGenerateUtil.generateRowObjectArray( - testDesc.bigTableKeyTypeInfos, testData.getBigTableBatchStream(), + testDesc.bigTableTypeInfos, testData.getBigTableBatchStream(), testData.getBigTableBatch(), testDesc.outputObjectInspectors); } else { @@ -141,9 +145,20 @@ protected static MapJoinOperator setupBenchmarkImplementation( (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator()); - MapJoinOperator operator = + // UNDONE: We need to plumb down shareMapJoinTableContainer.... + CreateMapJoinResult createMapJoinResult = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc, + /* shareMapJoinTableContainer */ null); + MapJoinOperator operator = createMapJoinResult.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = createMapJoinResult.mapJoinTableContainer; + + // Invoke initializeOp methods. + operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return operator; } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index c9da92a..aa88297 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -59,7 +59,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index a6b4719..60b2890 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 1b31038..937ede1 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f513fe5..2733db6 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -559,6 +559,7 @@ minillaplocal.query.files=\ explainanalyze_2.q,\ explainuser_1.q,\ explainuser_4.q,\ + fullouter_mapjoin_1_optimized.q,\ groupby2.q,\ groupby_groupingset_bug.q,\ hybridgrace_hashjoin_1.q,\ @@ -730,6 +731,10 @@ minillaplocal.query.files=\ vector_complex_join.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_full_outer_join.q,\ + vector_fullouter_mapjoin_1_fast.q,\ + vector_fullouter_mapjoin_1_optimized.q,\ + vector_fullouter_mapjoin_1_optimized_passthru.q,\ vector_groupby_cube1.q,\ vector_groupby_grouping_id1.q,\ vector_groupby_grouping_id2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 02a67cb..3762ee5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -790,7 +790,16 @@ private boolean hasRightPairForLeft(int left, int right) { } private boolean hasAnyFiltered(int alias, List row) { - return row == dummyObj[alias] || hasFilter(alias) && JoinUtil.hasAnyFiltered(getFilterTag(row)); + if (row == dummyObj[alias]) { + return true; + } + if (hasFilter(alias) && row != null) { + ShortWritable shortWritable = (ShortWritable) row.get(row.size() - 1); + if (shortWritable != null) { + return JoinUtil.hasAnyFiltered(shortWritable.get()); + } + } + return false; } protected final boolean hasFilter(int alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 0a6e17a..931e78e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -668,6 +668,73 @@ private JSONObject outputPlan(Object work, return outputPlan(work, out, extended, jsonOutput, indent, ""); } + private boolean isInvokeVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + + private boolean isInvokeNonVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + @VisibleForTesting JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { @@ -689,65 +756,17 @@ JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeVectorization(vectorization); } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeNonVectorization(vectorization); } } if (invokeFlag) { @@ -825,64 +844,18 @@ JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); - if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } - } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + if (invokeFlag) { + if (this.work != null && this.work.isVectorization()) { + invokeFlag = isInvokeVectorization(vectorization); + } else { + invokeFlag = isInvokeNonVectorization(vectorization); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java index a914ce3..1aae142 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinUtil.java @@ -140,7 +140,7 @@ public static int populateJoinKeyValue(List[] outMap, if (key == (byte) posBigTableAlias) { valueFields.add(null); } else { - valueFields.add(ExprNodeEvaluatorFactory.get(expr, conf)); + valueFields.add(expr == null ? null : ExprNodeEvaluatorFactory.get(expr, conf)); } } outMap[key] = valueFields; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..51389f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +42,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +81,8 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +112,31 @@ protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + /* + * FULL OUTER MapJoin members. + */ + protected transient boolean isFullOuterMapJoin; // Are we doing a FULL OUTER MapJoin? + protected transient boolean isFullOuterForwardKeysToIntersect; + // Was the MapJoin planned for Shared-Memory + // (as to Dynamic Partition Hash Join) and + // we need tp forward first-time key matches + // to the FULL OUTER INTERSECT Reducer? + protected transient boolean isFullOuterIntersect; + // Is this the FULL OUTER INTERSECT Reducer's + // MapJoin? + + protected transient int fullOuterBigTableRetainSize; + // The number of Big Table columns being + // retained in the output result for + // FULL OUTER MapJoin. + + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared, we need this non-shared private object for + * our (operator private) key match tracking. + */ + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +209,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { hybridMapJoinLeftover = false; firstSmallTable = null; + doFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +286,37 @@ protected void completeInitializationOp(Object[] os) throws HiveException { } } + /* + * Do initialization for FULL OUTER MapJoin. + * + * Currently, we do not support FULL OUTER MapJoin for N-way. + * + * When a Shared-Memory (i.e. instead of a Dynamic Partition Hash Join) is planned, we have a + * FULL OUTER INTERSECT Reducer and MapJoin. On the FULL OUTER MapJoin side, we need to forward + * first-time key matches to INTERSECT using an auxiliary child operator feature. + */ + private void doFullOuterMapJoinInit() { + + // This will be set during the first process call or during closeOp if no rows processed. + matchTracker = null; + + isFullOuterMapJoin = (condn.length == 1 && condn[0].getType() == JoinDesc.FULL_OUTER_JOIN); + if (isFullOuterMapJoin) { + fullOuterBigTableRetainSize = conf.getRetainList().get(posBigTable).size(); + isFullOuterForwardKeysToIntersect = !conf.isDynamicPartitionHashJoin(); + if (isFullOuterForwardKeysToIntersect) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator. Big assumption here it is the 2nd child operator. + auxiliaryChildIndex = 1; + } + isFullOuterIntersect = conf.isFullOuterIntersect(); + } else { + isFullOuterForwardKeysToIntersect = false; + isFullOuterIntersect = false; + } + } + @VisibleForTesting public void setTestMapJoinTableContainer(int posSmallTable, MapJoinTableContainer testMapJoinTableContainer, @@ -415,6 +480,40 @@ public void cleanUpInputFileChangedOp() throws HiveException { return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + protected JoinUtil.JoinResult setMapJoinKeyNoNulls( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromRowNoNulls(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + protected boolean setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromRowNoResult( + row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + /* + * FULL OUTER MapJoin: When a Shared-Memory (i.e. instead of a Dynamic Partition Hash Join) is + * planned, we use this method to forward first-time key matches to the INTERSECT Reducer. + */ + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + Object standardFirstTimeMatchRow = + ObjectInspectorUtils.copyToStandardObject( + firstTimeMatchRow, inputObjInspectors[posBigTable], ObjectInspectorCopyOption.WRITABLE); + forwardAuxiliary(standardFirstTimeMatchRow, outputOI); + } + protected MapJoinKey getRefKey(byte alias) { // We assume that since we are joining on the same key, all tables would have either // optimized or non-optimized key; hence, we can pass any key in any table as reference. @@ -437,6 +536,10 @@ public void process(Object row, int tag) throws HiveException { for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey); + if (isFullOuterMapJoin) { + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetters[pos]).createMatchTracker(); + } } } } @@ -463,7 +566,26 @@ public void process(Object row, int tag) throws HiveException { ReusableGetAdaptor adaptor; if (firstSetKey == null) { adaptor = firstSetKey = hashMapRowGetters[pos]; - joinResult = setMapJoinKey(firstSetKey, row, alias); + if (!isFullOuterMapJoin) { + // Normal case. + joinResult = setMapJoinKey(firstSetKey, row, alias); + } else if (!isFullOuterIntersect) { + // FULL OUTER MapJoin. We do not want keys with any NULLs to get tracked. + joinResult = setMapJoinKeyNoNulls(firstSetKey, row, alias, matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + forwardFirstTimeMatchToFullOuterIntersect(row, outputObjInspector); + } + } else { + // FULL OUTER MapJoin Intersect. Lookup the key and don't collect Small Table results. + if (setMapJoinKeyNoResult(firstSetKey, row, alias, matchTracker)) { + // Intersect match has now been tracked. + return; + } + spillBigTableRow(mapJoinTables[pos], row); + return; + } } else { // Keys for all tables are the same, so only the first has to deserialize them. adaptor = hashMapRowGetters[pos]; @@ -544,8 +666,149 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object bigTable.add(row); } + /* + * For FULL OUTER MapJoin, create a key match tracker on the Small Table. + */ + private void createMatchTracker(MapJoinTableContainer smallTable) { + ReusableGetAdaptor hashMapRowGetter = smallTable.createGetter(null); + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetter).createMatchTracker(); + Preconditions.checkState(matchTracker != null); + } + + private byte findSmallTable() { + byte smallTablePos = -1; + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != conf.getPosBigTable()) { + smallTablePos = pos; + break; + } + } + Preconditions.checkState(smallTablePos != -1); + return smallTablePos; + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table keys and values and odd them to the + * join output result. + * + * When a Shared-Memory (i.e. instead of a Dynamic Partition Hash Join) is planned, we do the + * generation in the INTERSECT Reducer. + */ + protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, + MapJoinTableContainer substituteSmallTable) throws HiveException { + + // FUTURE: Currently, in the MapJoinOperaotr, we only support FULL OUTER MapJoin for + // FUTURE MapJoinBytesTableContainer. NOTE: Vectorization code will override this method. + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + if (matchTracker == null) { + + // When the process method isn't called (i.e. no rows), then we need to create the + // MatchTracker here. + // + // It will indicate no matches, of course. + // + createMatchTracker(substituteSmallTable); + } + + boolean isSmallTableValuesOnly = false; + int[] smallTableValuesIndex = conf.getValueIndex(smallTablePos); + if (smallTableValuesIndex == null) { + List valuesList = conf.getRetainList().get(smallTablePos); + smallTableValuesIndex = + ArrayUtils.toPrimitive(valuesList.toArray(new Integer[0])); + isSmallTableValuesOnly = true; + } + final int smallTableValuesIndexSize = smallTableValuesIndex.length; + + // Our first output column for Small Table results is based on order. (The Big Table columns + // will all be NULL). + final int firstOutputColumnNum = (posBigTable == (byte) 0 ? fullOuterBigTableRetainSize : 0); + + /* + * Create iterator that produces each non-matched Small Table key and a ReusableRowContainer + * the Small Table values. + */ + NonMatchedSmallTableIterator nonMatchedIterator = + substituteSmallTable.createNonMatchedSmallTableIterator(matchTracker); + int nonMatchedKeyCount = 0; + int nonMatchedValueCount = 0; + while (nonMatchedIterator.isNext()) { + List keyObjList = nonMatchedIterator.getCurrentKey(); + + MapJoinRowContainer values = nonMatchedIterator.getCurrentRows(); + AbstractRowContainer.RowIterator> iter = values.rowIter(); + for (List valueObjList = iter.first(); + valueObjList != null; + valueObjList = iter.next()) { + + // Form non-matched Small Table join result. We only fill in the Small Table columns, + // so the Big Table retained columns are NULLs from the new allocation. + + Object[] row = new Object[fullOuterBigTableRetainSize + smallTableValuesIndexSize]; + int outputColumnNum = firstOutputColumnNum; + + if (isSmallTableValuesOnly) { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + row[outputColumnNum++] = valueObjList.get(smallTableValuesIndex[i]); + } + } else { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + final int index = smallTableValuesIndex[i]; + + if (index >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + row[outputColumnNum++] = keyObjList.get(index); + } else { + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + int smallTableValueIndex = -index - 1; + + row[outputColumnNum++] = valueObjList.get(smallTableValueIndex); + } + } + } + + // UNDONE: Do we need to copy the objects? + Object standardCopyRow = + ObjectInspectorUtils.copyToStandardObject( + row, outputObjInspector, ObjectInspectorCopyOption.WRITABLE); + + internalForward(standardCopyRow, outputObjInspector); + nonMatchedValueCount++; + } + + nonMatchedKeyCount++; + } + } + @Override public void closeOp(boolean abort) throws HiveException { + + if (isFullOuterMapJoin) { + + // FULL OUTER MapJoin: After matching the Big Table row keys against the Small Table, we now + // add any non matched Small Table key and values to the join output result. + + // FUTURE: Currently, we only support FULL OUTER MapJoin for single condition MapJoins. + byte smallTablePos = findSmallTable(); + generateFullOuterSmallTableNoMatches( + smallTablePos, + (MapJoinTableContainer) mapJoinTables[smallTablePos]); + } + boolean spilled = false; for (MapJoinTableContainer container : mapJoinTables) { if (container != null) { @@ -590,6 +853,9 @@ public void closeOp(boolean abort) throws HiveException { hybridHtContainer.getTotalInMemRowCount() - hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear(); + if (matchTracker != null) { + matchTracker.clearPartition(i); + } } } assert hybridHtContainer.getTotalInMemRowCount() == 0; @@ -662,13 +928,28 @@ private void clearAllTableContainers() { */ private void continueProcess(int partitionId) throws HiveException, IOException, SerDeException, ClassNotFoundException { + byte smallTablePos = -1; for (byte pos = 0; pos < mapJoinTables.length; pos++) { if (pos != conf.getPosBigTable()) { LOG.info("Going to reload hash partition " + partitionId); reloadHashTable(pos, partitionId); + smallTablePos = pos; } } + reProcessBigTable(partitionId); + + if (isFullOuterMapJoin) { + + // FULL OUTER MapJoin: Just like at the beginning of closeOp -- for the current Hybrid Grace + // partition -- after matching the Big Table row keys against the Small Table, we now + // add any non matched Small Table key and values to the join output result. + + // Use the spill MapJoinBytesTableContainer as the substitute. + generateFullOuterSmallTableNoMatches( + smallTablePos, + spilledMapJoinTables[smallTablePos]); + } } /** @@ -729,11 +1010,24 @@ protected void reloadHashTable(byte pos, int partitionId) + restoredHashMap.getNumValues()); kvContainer.clear(); - spilledMapJoinTables[pos] = new MapJoinBytesTableContainer(restoredHashMap); - spilledMapJoinTables[pos].setInternalValueOi(container.getInternalValueOi()); - spilledMapJoinTables[pos].setSortableSortOrders(container.getSortableSortOrders()); - spilledMapJoinTables[pos].setNullMarkers(container.getNullMarkers()); - spilledMapJoinTables[pos].setNotNullMarkers(container.getNotNullMarkers()); + MapJoinBytesTableContainer spilledMapJoinTable = + new MapJoinBytesTableContainer(restoredHashMap); + + spilledMapJoinTable.setSerde(container.getKeyContext(), container.getValueContext()); + spilledMapJoinTable.setInternalValueOi(container.getInternalValueOi()); + spilledMapJoinTable.setSortableSortOrders(container.getSortableSortOrders()); + spilledMapJoinTable.setNullMarkers(container.getNullMarkers()); + spilledMapJoinTable.setNotNullMarkers(container.getNotNullMarkers()); + + if (isFullOuterMapJoin) { + + // FULL OUTER MapJoin: Since we are switching to a single MapJoinBytesTableContainer, we + // don't need the partitioned MatchTracker anymore. Recreate regular one for our new + // join table. + createMatchTracker(spilledMapJoinTable); + } + + spilledMapJoinTables[pos] = spilledMapJoinTable; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index c28ef99..b66c718 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -242,6 +242,7 @@ public RowSchema getSchema() { // for output rows of this operator protected transient ObjectInspector outputObjInspector; + protected transient int auxiliaryChildIndex = -1; public void setId(String id) { this.id = id; @@ -914,51 +915,21 @@ protected long getNextCntr(long cntr) { protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - forward(row, rowInspector, false); - } - - protected void forward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - forward(vrg, rowInspector, true); - } - - protected void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) - throws HiveException { - if (isVectorized) { - vectorForward((VectorizedRowBatch) row, rowInspector); - } else { - baseForward(row, rowInspector); - } - } - - private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - this.runTimeNumRows += vrg.count(); + runTimeNumRows++; if (getDone()) { return; } - // Data structures to store original values - final int size = vrg.size; - final boolean selectedInUse = vrg.selectedInUse; - final boolean saveState = (selectedInUse && multiChildren); - if (saveState) { - System.arraycopy(vrg.selected, 0, selected, 0, size); - } - int childrenDone = 0; for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } Operator o = childOperatorsArray[i]; if (o.getDone()) { childrenDone++; } else { - o.process(vrg, childOperatorsTag[i]); - // Restore original values - vrg.size = size; - vrg.selectedInUse = selectedInUse; - if (saveState) { - System.arraycopy(selected, 0, vrg.selected, 0, size); - } + o.process(row, childOperatorsTag[i]); } } @@ -968,27 +939,79 @@ private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) } } - private void baseForward(Object row, ObjectInspector rowInspector) + /* + * Forward a row to the auxiliary child operator. + */ + public void forwardAuxiliary(Object row, ObjectInspector rowInspector) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; + } + + auxiliaryChild.process(row, childOperatorsTag[auxiliaryChildIndex]); + } + + /* + * Forward a VectorizedRowBatch to the children operators. + */ + protected void vectorForward(VectorizedRowBatch batch) throws HiveException { - this.runTimeNumRows++; + + runTimeNumRows++; if (getDone()) { return; } - int childrenDone = 0; - for (int i = 0; i < childOperatorsArray.length; i++) { - Operator o = childOperatorsArray[i]; - if (o.getDone()) { - childrenDone++; - } else { - o.process(row, childOperatorsTag[i]); + // Data structures to store original values + final int size = batch.size; + final boolean selectedInUse = batch.selectedInUse; + final boolean saveState = (selectedInUse && multiChildren); + if (saveState) { + System.arraycopy(batch.selected, 0, selected, 0, size); + } + + final int childSize = childOperatorsArray.length; + if (childSize == 1) { + childOperatorsArray[0].process(batch, childOperatorsTag[0]); + } else { + int childrenDone = 0; + for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } + Operator o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + o.process(batch, childOperatorsTag[i]); + + // Restore original values + batch.size = size; + batch.selectedInUse = selectedInUse; + if (saveState) { + System.arraycopy(selected, 0, batch.selected, 0, size); + } + } + } + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); } } + } - // if all children are done, this operator is also done - if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { - setDone(true); + /* + * Forward a VectorizedRowBatch to the auxiliary child operator. + */ + public void vectorForwardAuxiliary(VectorizedRowBatch batch) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; } + + auxiliaryChild.process(batch, childOperatorsTag[auxiliaryChildIndex]); } public void reset(){ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 0799181..ca04467 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -122,7 +122,11 @@ public void process(Object row, int tag) throws HiveException { if (conf != null && conf.isGatherStats()) { gatherStats(row); } - forward(row, inputObjInspectors[tag], vectorized); + if (vectorized) { + vectorForward((VectorizedRowBatch) row); + } else { + forward(row, inputObjInspectors[tag]); + } } private boolean checkSetDone(Object row, int tag) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index add8bda..4f3e029 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -466,16 +466,18 @@ public void put(KvSource kv, int keyHashCode) throws SerDeException { * @param key Key buffer. * @param offset the offset to the key in the buffer * @param hashMapResult The object to fill in that can read the values. + * @param matchTracker Opitional object for tracking key matches. * @return The state byte. */ - public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { + public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult, + MatchTracker matchTracker) { hashMapResult.forget(); WriteBuffers.Position readPos = hashMapResult.getReadPos(); // First, find first record for the key. - long ref = findKeyRefToRead(key, offset, length, readPos); + long ref = findKeyRefToRead(key, offset, length, readPos, matchTracker); if (ref == 0) { return 0; } @@ -490,6 +492,16 @@ public byte getValueResult(byte[] key, int offset, int length, Result hashMapRes return Ref.getStateByte(ref); } + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + public void lookupKeyNoResult(byte[] key, int offset, int length, WriteBuffers.Position readPos, + MatchTracker matchTracker) { + + findKeyRefToRead(key, offset, length, readPos, matchTracker); + } + /** * Take the segment reference from {@link #getValueRefs(byte[], int, List)} * result and makes it self-contained - adds byte array where the value is stored, and @@ -500,6 +512,54 @@ public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { } /** + * Finds the next non matched Small Table key and value. Supports FULL OUTER MapJoin. + * + * @param currentSlotNum Start by specifying -1; the return index from the previous call. + * @param keyRef If the return value is not -1, a reference to the key bytes. + * @param hashMapResult If the return value is not -1, the key's values. + * @param matchTracker The object that tracks matches (non-shared). + * @return The current index of the non-matched key; or -1 if no more. + */ + public int findNextNonMatched(int currentSlotNum, WriteBuffers.ByteSegmentRef keyRef, + Result hashMapResult, MatchTracker matchTracker) { + currentSlotNum++; + + hashMapResult.forget(); + + WriteBuffers.Position readPos = hashMapResult.getReadPos(); + + while (true) { + if (currentSlotNum >= refs.length) { + + // No more. + return -1; + } + long ref = refs[currentSlotNum]; + if (ref != 0 && !matchTracker.wasMatched(currentSlotNum)) { + + // An unmatched key. + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); + int valueLength = (int) writeBuffers.readVLong(readPos); + int keyLength = (int) writeBuffers.readVLong(readPos); + long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); + + keyRef.reset(keyOffset, keyLength); + if (keyLength > 0) { + writeBuffers.populateValue(keyRef); + } + + boolean hasList = Ref.hasList(ref); + long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; + + hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen); + + return currentSlotNum; + } + currentSlotNum++; + } + } + + /** * Number of keys in the hashmap * @return number of keys */ @@ -516,8 +576,12 @@ public int getNumValues() { return numValues; } + public int getNumHashBuckets() { + return refs.length; + } + /** - * Number of bytes used by the hashmap + * Number of bytes used by the hashmap. * There are two main components that take most memory: writeBuffers and refs * Others include instance fields: 100 * @return number of bytes @@ -614,7 +678,7 @@ private int findKeySlotToWrite(long keyOffset, int keyLength, int hashCode) { * @return The ref to use for reading. */ private long findKeyRefToRead(byte[] key, int offset, int length, - WriteBuffers.Position readPos) { + WriteBuffers.Position readPos, MatchTracker matchTracker) { final int bucketMask = (refs.length - 1); int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; @@ -629,6 +693,13 @@ private long findKeyRefToRead(byte[] key, int offset, int length, return 0; } if (isSameKey(key, offset, length, ref, hashCode, readPos)) { + + if (matchTracker != null) { + + // Support for FULL OUTER MapJoin. Track matches of the slot table entry. + matchTracker.trackMatch(slot); + } + return ref; } ++metricGetConflict; @@ -897,7 +968,7 @@ public void debugDumpTable() { dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); - getValueResult(key, 0, key.length, hashMapResult); + getValueResult(key, 0, key.length, hashMapResult, null); List results = new ArrayList(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 9d35805..f407c87 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -113,6 +114,7 @@ public void put(MapJoinKey key, MapJoinRowContainer value) { public int size() { return mHash.size(); } + @Override public Set> entrySet() { return mHash.entrySet(); @@ -141,6 +143,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public long getEstimatedMemorySize() { // TODO: Key and Values are Object[] which can be eagerly deserialized or lazily deserialized. To accurately // estimate the entry size, every possible Objects in Key, Value should implement MemoryEstimate interface which @@ -188,6 +196,22 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public boolean setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (currentKey == null) { @@ -208,6 +232,18 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public boolean setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 027e39a..64d90b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; @@ -63,6 +65,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + import com.esotericsoftware.kryo.Kryo; /** @@ -95,6 +99,9 @@ /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; + private MapJoinObjectSerDeContext keyContext; + private MapJoinObjectSerDeContext valueContext; + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -776,6 +783,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + return new NonMatchedSmallTableIteratorImpl(matchTracker); + } + + @Override public void seal() { for (HashPartition hp : hashPartitions) { // Only seal those partitions that haven't been spilled and cleared, @@ -834,6 +847,72 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + boolean hasNulls = false; + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + if (currentKey[i] == null) { + nulls[i] = true; + hasNulls = true; + } else { + nulls[i] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + nulls[i] = currentKey[i] == null; + } + return currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { @@ -850,6 +929,56 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + boolean hasNulls = false; + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + if (currentKey[keyIndex] == null) { + nulls[keyIndex] = true; + hasNulls = true; + } else { + nulls[keyIndex] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + nulls[keyIndex] = currentKey[keyIndex] == null; + } + return currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) throws HiveException { assert other instanceof GetAdaptor; @@ -884,8 +1013,35 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + return currentValue.setDirectNoResult(bytes, offset, length, readPos, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + final int partitionCount = hashPartitions.length; + MatchTracker matchTracker = MatchTracker.createPartitioned(partitionCount); + for (int partitionId = 0; partitionId < partitionCount; partitionId++) { + HashPartition hashPartition = hashPartitions[partitionId]; + if (!hashPartition.hashMapOnDisk) { + // System.out.println("*DEBUG* addPartition(" + partitionId + ")"); + matchTracker.addPartition(partitionId, hashPartition.hashMap.getNumHashBuckets()); + } + } + // System.out.println("*DEBUG* FULL OUTER MapJoin MatchTracker.createPartitioned(" + + // partitionCount + ")"); + return matchTracker; } @Override @@ -899,6 +1055,7 @@ public int directSpillPartitionId() { implements MapJoinRowContainer, AbstractRowContainer.RowIterator> { private byte aliasFilter; private final BytesBytesMultiHashMap.Result hashMapResult; + private final WriteBuffers.Position noResultReadPos; /** * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row. @@ -932,9 +1089,14 @@ public ReusableRowContainer() { } uselessIndirection = new ByteArrayRef(); hashMapResult = new BytesBytesMultiHashMap.Result(); + noResultReadPos = new WriteBuffers.Position(); clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + /* Determine if there is a match between big table row and the corresponding hashtable * Three states can be returned: * MATCH: a match is found @@ -963,10 +1125,9 @@ public ReusableRowContainer() { toSpillPartitionId = partitionId; hashMapResult.forget(); return JoinUtil.JoinResult.SPILL; - } - else { + } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, - output.getLength(), hashMapResult); + output.getLength(), hashMapResult, /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -977,6 +1138,81 @@ public ReusableRowContainer() { } } + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { + int keyHash = HashCodeUtil.murmurHash(output.getData(), 0, output.getLength()); + + if (bloom1 != null && !bloom1.testLong(keyHash)) { + /* + * if the keyHash is missing in the bloom filter, then the value cannot + * exist in any of the spilled partition - return NOMATCH + */ + dummyRow = null; + aliasFilter = (byte) 0xff; + hashMapResult.forget(); + return JoinResult.NOMATCH; + } + + partitionId = keyHash & (hashPartitions.length - 1); + + // If the target hash table is on disk, spill this row to disk as well to be processed later + if (isOnDisk(partitionId)) { + toSpillPartitionId = partitionId; + hashMapResult.forget(); + return JoinUtil.JoinResult.SPILL; + } else { + MatchTracker partitionMatchTracker = + (matchTracker == null ? null : matchTracker.getPartition(partitionId)); + aliasFilter = hashPartitions[partitionId].hashMap.getValueResult( + output.getData(), 0, output.getLength(), + hashMapResult, + partitionMatchTracker); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + public boolean setFromOutputNoResult(Output output, MatchTracker matchTracker) { + + int keyHash = HashCodeUtil.murmurHash(output.getData(), 0, output.getLength()); + + if (bloom1 != null && !bloom1.testLong(keyHash)) { + /* + * if the keyHash is missing in the bloom filter, then the value cannot + * exist in any of the spilled partition - return NOMATCH + */ + return true; + } + + partitionId = keyHash & (hashPartitions.length - 1); + + // If the target hash table is on disk, spill this row to disk as well to be processed later + if (isOnDisk(partitionId)) { + toSpillPartitionId = partitionId; + return false; + } else { + MatchTracker partitionMatchTracker = + (matchTracker == null ? null : matchTracker.getPartition(partitionId)); + hashPartitions[partitionId].hashMap.lookupKeyNoResult( + output.getData(), 0, output.getLength(), + noResultReadPos, + partitionMatchTracker); + return true; + } + } + + public void reset() { + hashMapResult.forget(); + } + @Override public boolean hasRows() { return hashMapResult.hasRows() || (dummyRow != null); @@ -1094,7 +1330,7 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); partitionId = keyHash & (hashPartitions.length - 1); @@ -1115,8 +1351,12 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out return JoinUtil.JoinResult.SPILL; } else { - aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(bytes, offset, length, - hashMapResult); + MatchTracker partitionMatchTracker = + (matchTracker == null ? null : matchTracker.getPartition(partitionId)); + aliasFilter = hashPartitions[partitionId].hashMap.getValueResult( + bytes, offset, length, + hashMapResult, + partitionMatchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -1127,11 +1367,148 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out } } + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + public boolean setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); + partitionId = keyHash & (hashPartitions.length - 1); + + if (bloom1 != null && !bloom1.testLong(keyHash)) { + /* + * if the keyHash is missing in the bloom filter, then the value cannot exist in any of the + * spilled partition - return NOMATCH + */ + return true; + } + + // If the target hash table is on disk, spill this row to disk as well to be processed later + if (isOnDisk(partitionId)) { + return false; + } + else { + MatchTracker partitionMatchTracker = + (matchTracker == null ? null : matchTracker.getPartition(partitionId)); + hashPartitions[partitionId].hashMap.lookupKeyNoResult( + bytes, offset, length, + readPos, + partitionMatchTracker); + return true; + } + } + public int directSpillPartitionId() { return partitionId; } } + /** + * For FULL OUTER MapJoin: Iterates through the Small Tables of the currently loaded hash + * partitions hash tables and returns the key and value rows for any non-matched keys. + */ + private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator { + + private final MatchTracker partitionedMatchTracker; + + private int currentPartitionId; + private MatchTracker currentMatchTracker; + private BytesBytesMultiHashMap currentHashMap; + private int currentIndex; + + private final WriteBuffers.ByteSegmentRef keyRef; + private final BytesWritable bytesWritable; + private final ReusableRowContainer currentValue; + + NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) { + Preconditions.checkState(matchTracker.getIsPartitioned()); + this.partitionedMatchTracker = matchTracker; + + if (keySerde == null) { + System.out.println("*DEBUG* keySerde is null"); + } + Preconditions.checkState(keySerde != null); + + currentPartitionId = -1; + currentMatchTracker = null; + currentHashMap = null; + currentIndex = -1; + + keyRef = new WriteBuffers.ByteSegmentRef(); + bytesWritable = new BytesWritable(); + + currentValue = new ReusableRowContainer(); + } + + private boolean findNextPartitionId() { + while (true) { + if (++currentPartitionId >= hashPartitions.length) { + return false; + } + currentMatchTracker = partitionedMatchTracker.getPartition(currentPartitionId); + if (currentMatchTracker != null) { + currentHashMap = hashPartitions[currentPartitionId].hashMap; + if (currentHashMap != null) { + currentIndex = -1; + return true; + } + } + } + // throw new RuntimeException("Cannot get here"); + } + + @Override + public boolean isNext() { + + if (currentPartitionId == -1) { + if (!findNextPartitionId()) { + return false; + } + } + while (true) { + // If another non-matched key is found, the key bytes will be referenced by keyRef, and + // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows. + currentIndex = + currentHashMap.findNextNonMatched( + currentIndex, keyRef, currentValue.getHashMapResult(), currentMatchTracker); + if (currentIndex != -1) { + return true; + } + if (!findNextPartitionId()) { + return false; + } + } + // throw new RuntimeException("Cannot get here"); + } + + @Override + public List getCurrentKey() throws HiveException { + List deserializedList = + MapJoinKey.deserializeRow( + keyRef.getBytes(), + (int) keyRef.getOffset(), + keyRef.getLength(), + bytesWritable, keySerde); + return deserializedList; + } + + @Override + public ByteSegmentRef getCurrentKeyAsRef() { + return keyRef; + } + + @Override + public MapJoinRowContainer getCurrentRows() { + return currentValue; + } + + @Override + public BytesBytesMultiHashMap.Result getHashMapResult() { + return currentValue.getHashMapResult(); + } + } @Override public void dumpMetrics() { for (int i = 0; i < hashPartitions.length; i++) { @@ -1168,10 +1545,23 @@ public int size() { return totalSize; } + public MapJoinObjectSerDeContext getKeyContext() { + return keyContext; + } + + public MapJoinObjectSerDeContext getValueContext() { + return valueContext; + } + @Override public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) throws SerDeException { - AbstractSerDe keySerde = keyCtx.getSerDe(), valSerde = valCtx.getSerDe(); + + keyContext = keyCtx; + valueContext = valCtx; + + keySerde = keyCtx.getSerDe(); + AbstractSerDe valSerde = valCtx.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 033bbdb..8fada3c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; @@ -65,6 +67,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; + /** * Table container that serializes keys and values using LazyBinarySerDe into * BytesBytesMultiHashMap, with very low memory overhead. However, @@ -88,6 +92,7 @@ * compare the large table keys correctly when we do, we need to serialize them with correct * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe. */ + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -336,9 +341,17 @@ public void setKeyValue(Writable key, Writable val) { @Override public byte updateStateByte(Byte previousValue) { - if (filterGetter == null) return (byte)0xff; + if (!hasTag || filterGetter == null) { + return (byte) 0xff; + } byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue(); - filterGetter.init((BinaryComparable)value); + BinaryComparable binaryComparableValue = (BinaryComparable) value; + if (binaryComparableValue.getLength() == 0) { + + // Skip empty values just like MapJoinEagerRowContainer.read does. + return (byte) 0xff; + } + filterGetter.init(binaryComparableValue); aliasFilter &= filterGetter.getShort(); return aliasFilter; } @@ -407,7 +420,8 @@ public long getEstimatedMemorySize() { @Override public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException { - AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); + keySerde = keyContext.getSerDe(); + AbstractSerDe valSerde = valueContext.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); @@ -456,6 +470,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + return new NonMatchedSmallTableIteratorImpl(matchTracker); + } + + @Override public void seal() { hashMap.seal(); } @@ -541,6 +561,72 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + boolean hasNulls = false; + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + if (currentKey[i] == null) { + nulls[i] = true; + hasNulls = true; + } else { + nulls[i] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + nulls[i] = currentKey[i] == null; + } + return currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { @@ -557,6 +643,56 @@ public GetAdaptor() { sortableSortOrders, nullMarkers, notNullMarkers)); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + boolean hasNulls = false; + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + if (currentKey[keyIndex] == null) { + nulls[keyIndex] = true; + hasNulls = true; + } else { + nulls[keyIndex] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + nulls[keyIndex] = currentKey[keyIndex] == null; + } + return currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; @@ -591,8 +727,24 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + return currentValue.setDirectNoResult(bytes, offset, length, readPos, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + return MatchTracker.create(hashMap.getNumHashBuckets()); } @Override @@ -619,6 +771,7 @@ public int directSpillPartitionId() { private final LazyBinaryStruct valueStruct; private final boolean needsComplexObjectFixup; private final ArrayList complexObjectArrayBuffer; + private final WriteBuffers.Position noResultReadPos; public ReusableRowContainer() { if (internalValueOi != null) { @@ -639,13 +792,18 @@ public ReusableRowContainer() { } uselessIndirection = new ByteArrayRef(); hashMapResult = new BytesBytesMultiHashMap.Result(); + noResultReadPos = new WriteBuffers.Position(); clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + public JoinUtil.JoinResult setFromOutput(Output output) { aliasFilter = hashMap.getValueResult( - output.getData(), 0, output.getLength(), hashMapResult); + output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -653,8 +811,34 @@ public ReusableRowContainer() { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } + } + + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { - } + aliasFilter = hashMap.getValueResult( + output.getData(), 0, output.getLength(), hashMapResult, matchTracker); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + public boolean setFromOutputNoResult(Output output, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult( + output.getData(), 0, output.getLength(), noResultReadPos, matchTracker); + return true; + } + + public void reset() { + hashMapResult.forget(); + } @Override public boolean hasRows() { @@ -773,8 +957,8 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -783,6 +967,81 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out return JoinUtil.JoinResult.NOMATCH; } } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + public boolean setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult(bytes, offset, length, readPos, matchTracker); + return true; + } + } + + /** + * For FULL OUTER MapJoin: Iterates through the Small Table hash table and returns the key and + * value rows for any non-matched keys. + */ + private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator { + + private final MatchTracker matchTracker; + + private int currentIndex; + + private final WriteBuffers.ByteSegmentRef keyRef; + private final BytesWritable bytesWritable; + private final ReusableRowContainer currentValue; + + NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + + Preconditions.checkState(keySerde != null); + + currentIndex = -1; + + keyRef = new WriteBuffers.ByteSegmentRef(); + bytesWritable = new BytesWritable(); + + currentValue = new ReusableRowContainer(); + } + + @Override + public boolean isNext() { + + // If another non-matched key is found, the key bytes will be referenced by keyRef, and + // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows. + currentIndex = + hashMap.findNextNonMatched( + currentIndex, keyRef, currentValue.getHashMapResult(), matchTracker); + return (currentIndex != -1); + } + + @Override + public List getCurrentKey() throws HiveException { + List deserializedList = + MapJoinKey.deserializeRow( + keyRef.getBytes(), + (int) keyRef.getOffset(), + keyRef.getLength(), + bytesWritable, keySerde); + return deserializedList; + } + + @Override + public ByteSegmentRef getCurrentKeyAsRef() { + return keyRef; + } + + @Override + public MapJoinRowContainer getCurrentRows() { + return currentValue; + } + + @Override + public BytesBytesMultiHashMap.Result getHashMapResult() { + return currentValue.getHashMapResult(); + } } public static boolean isSupportedKey(ObjectInspector keyOi) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 6504a5f..49d5e43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; /** @@ -171,4 +172,19 @@ public static Output serializeRow(Output byteStream, Object[] fieldData, } return byteStream; } + + /** + * Deserializes a key. Needed for FULL OUTER MapJoin to unpack the Small Table key when + * adding the non matched key to the join output result. + * @param BytesWritable to reuse. + */ + public static List deserializeRow(byte[] keyBytes, int keyOffset, int keyLength, + BytesWritable bytesWritable, AbstractSerDe serde) throws HiveException { + try { + bytesWritable.set(keyBytes, keyOffset, keyLength); + return (List) serde.deserialize(bytesWritable); + } catch (SerDeException e) { + throw new HiveException("Serialization error", e); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java index 345d1f4..1d7aec8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hive.ql.exec.persistence; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; @SuppressWarnings("deprecation") public class MapJoinObjectSerDeContext { @@ -55,6 +60,18 @@ public boolean hasFilterTag() { return hasFilter; } + public String stringify() { + StandardStructObjectInspector standardStructOI = (StandardStructObjectInspector) standardOI; + List structFields = standardStructOI.getAllStructFieldRefs(); + List typeInfoStrings = new ArrayList(); + for (StructField field : structFields) { + ObjectInspector fieldOI = field.getFieldObjectInspector(); + typeInfoStrings.add(fieldOI.getTypeName()); + } + return "[types " + typeInfoStrings.toString() + ", serde=" + serde.getClass().getName() + + ", hasFilter=" + hasFilter + "]"; + } + @Override public String toString() { return "MapJoinObjectSerDeContext [standardOI=" + standardOI + ", serde=" + serde diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java index b0c7574..d9bfb12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -43,9 +44,24 @@ * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a VHKW object, and writers and batch used to interpret it. */ + JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch) throws HiveException; + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + boolean setFromVectorNoResult(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a row object, and fields and ois used to interpret it. @@ -53,6 +69,22 @@ JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException; + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + boolean setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key that another adaptor has already deserialized via setFromVector/setFromRow. @@ -82,6 +114,42 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException; /** + * For FULL OUTER MapJoin: Iterates through the Small Table hash table and returns the key and + * value rows for any non-matched keys. + */ + public interface NonMatchedSmallTableIterator { + + /** + * Return true if another non-matched key was found. + */ + boolean isNext(); + + /** + * @return The current key as a desearialized object array after a successful next() call + * that returns true. + * @throws HiveException + */ + List getCurrentKey() throws HiveException; + + /** + * @return The current key as a WriteBuffers.ByteSegmentRef after a successful next() call + * that returns true. + */ + ByteSegmentRef getCurrentKeyAsRef(); + + /** + * @return The container w/the values rows for the current key after a successful next() call + * that returns true. + */ + MapJoinRowContainer getCurrentRows(); + + /** + * @return The value rows has a BytesBytesMultiHashMap result. + */ + BytesBytesMultiHashMap.Result getHashMapResult(); + } + + /** * Indicates to the container that the puts have ended; table is now r/o. */ void seal(); @@ -94,6 +162,12 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) */ ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader); + /** + * Creates an iterator for going through the hash table and returns the key and value rows for any + * non-matched keys + */ + NonMatchedSmallTableIterator createNonMatchedSmallTableIterator(MatchTracker matchTracker); + /** Clears the contents of the table. */ void clear(); @@ -108,7 +182,7 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) boolean hasSpill(); /** - * Return the size of the hash table + * Return the size of the hash table. */ int size(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java new file mode 100644 index 0000000..59904c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.persistence; + +import com.google.common.base.Preconditions; + +/** + * Record which hash table slot entries had key matches for FULL OUTER MapJoin. + * Supports partitioned match trackers for HybridHashTableContainer. + */ +public final class MatchTracker { + + /* + * Regular case: + * isPartitioned = false + * longMatchFlags one bit per hash table slot entry. + * If this tracker is underneath a partitioned tracker, then partitionParent is set. + * + * Paritioned case: + * isPartitioned = true + * The partitions array has a tracker for the currently active partitions. + */ + private final boolean isPartitioned; + private final MatchTracker partitionParent; + private final long[] longMatchFlags; + private final MatchTracker[] partitions; + + private MatchTracker(boolean isPartitioned, MatchTracker partitionParent, int count) { + this.isPartitioned = isPartitioned; + this.partitionParent = partitionParent; + if (!isPartitioned) { + final int longMatchFlagsSize = (count + Long.SIZE - 1) / Long.SIZE; + longMatchFlags = new long[longMatchFlagsSize]; + partitions = null; + } else { + longMatchFlags = null; + partitions = new MatchTracker[count]; + } + } + + /* + * Create a regular tracker. + */ + public static MatchTracker create(int logicalHashBucketCount) { + return new MatchTracker(false, null, logicalHashBucketCount); + } + + /* + * Create a partitioned tracker. Use addPartition and clearPartition to maintain the currently + * active partition trackers. + */ + public static MatchTracker createPartitioned(int partitionCount) { + return new MatchTracker(true, null, partitionCount); + } + + public boolean getIsPartitioned() { + return isPartitioned; + } + + public void addPartition(int partitionId, int logicalHashBucketCount) { + partitions[partitionId] = new MatchTracker(false, this, logicalHashBucketCount); + } + + public void clearPartition(int partitionId) { + partitions[partitionId] = null; + } + + public MatchTracker getPartition(int partitionId) { + return partitions[partitionId]; + } + + private boolean isFirstMatch; + + public boolean getIsFirstMatch() { + return isFirstMatch; + } + + /* + * Track a regular hash table slot match. + * If this tracker is underneath a partitioned tracker, the partitioned tracker's first-match + * flag will be updated. + */ + public void trackMatch(int logicalSlotNum) { + + Preconditions.checkState(!isPartitioned); + + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + if ((longMatchFlags[longWordIndex] & longBitMask) != 0) { + + // Flag is already on. + isFirstMatch = false; + } else { + longMatchFlags[longWordIndex] |= longBitMask; + isFirstMatch = true; + } + if (partitionParent != null) { + + // Push match flag up. + partitionParent.isFirstMatch = isFirstMatch; + } + } + + /* + * Track a partitioned hash table slot match. + */ + public void trackPartitionMatch(int partitionId, int logicalSlotNum) { + partitions[partitionId].trackMatch(logicalSlotNum); + } + + /* + * Was a regular hash table slot matched? + */ + public boolean wasMatched(int logicalSlotNum) { + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + return (longMatchFlags[longWordIndex] & longBitMask) != 0; + } + + /* + * Was a partitioned hash table slot matched? + */ + public boolean wasPartitionMatched(int partitionId, int logicalSlotNum) { + return partitions[partitionId].wasMatched(logicalSlotNum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java index 3303cc4..54d66a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java @@ -20,11 +20,21 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; public interface ReusableGetAdaptorDirectAccess { JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker); int directSpillPartitionId(); + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + boolean setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker); + + MatchTracker createMatchTracker(); } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java index 95400c8..0ff54ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; /** @@ -70,6 +71,8 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu return unwrap(iterator.next()); } + private static final ShortWritable ALL_ALIAS_FILTER_SHORT_WRITABLE = new ShortWritable((byte) 0xff); + private List unwrap(List values) { if (values == null) { return null; @@ -90,7 +93,14 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu } } if (tagged) { - unwrapped.add(values.get(values.size() - 1)); // append filter tag + + // Append filter tag. + final int size = values.size(); + if (size == 0) { + unwrapped.add(ALL_ALIAS_FILTER_SHORT_WRITABLE); + } else { + unwrapped.add(values.get(size - 1)); + } } return unwrapped; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 2cccb44..152dc98 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -262,7 +262,7 @@ public DynamicValueRegistryTez call() { e.getMessage()); throw (InterruptedException) e; } else { - throw new RuntimeException("Reduce operator initialization failed", e); + throw new RuntimeException(redWork.getName() + " operator initialization failed", e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index c4503ad..f2400b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -136,7 +136,7 @@ public void process(Object data, int tag) throws HiveException { throw new HiveException(e); } - forward(data, rowInspector, true); + forward(data, rowInspector); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index e96619c..9615869 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -20,6 +20,7 @@ import java.sql.Date; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -985,6 +986,17 @@ public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects } } + public void assignRow(VectorizedRowBatch batch, int batchIndex, ArrayList objectList) { + final int count = isConvert.length; + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, objectList.get(i)); + } else { + assignRowColumn(batch, batchIndex, i, objectList.get(i)); + } + } + } + /* * Assign a row from a list of standard objects up to a count */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index bedc12a..0cf8491 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -259,14 +259,27 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa private CopyRow[] subRowToBatchCopiersByReference; public void init(VectorColumnMapping columnMapping) throws HiveException { - int count = columnMapping.getCount(); + init( + columnMapping.getInputColumns(), + columnMapping.getOutputColumns(), + columnMapping.getTypeInfos()); + } + + public void init(int[] columnMap, TypeInfo[] typeInfos) throws HiveException { + init(columnMap, columnMap, typeInfos); + } + + public void init(int[] inputColumnMap, int[] outputColumnMap, TypeInfo[] typeInfos) + throws HiveException { + + final int count = inputColumnMap.length; subRowToBatchCopiersByValue = new CopyRow[count]; subRowToBatchCopiersByReference = new CopyRow[count]; for (int i = 0; i < count; i++) { - int inputColumn = columnMapping.getInputColumns()[i]; - int outputColumn = columnMapping.getOutputColumns()[i]; - TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; + int inputColumn = inputColumnMap[i]; + int outputColumn = outputColumnMap[i]; + TypeInfo typeInfo = typeInfos[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 8ea625e..c9927d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -444,6 +444,38 @@ public void init(boolean[] columnsToIncludeTruncated) throws HiveException { } + public void init(int[] outputColumns, boolean[] columnsToInclude) throws HiveException { + + Preconditions.checkState( + outputColumns.length == columnsToInclude.length); + + final int columnCount = sourceTypeInfos.length; + allocateArrays(columnCount); + + int includedCount = 0; + final int[] includedIndices = new int[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (!columnsToInclude[i]) { + + // Field not included in query. + + } else { + + initTopLevelField(i, outputColumns[i], sourceTypeInfos[i], dataTypePhysicalVariations[i]); + includedIndices[includedCount++] = i; + } + } + + // Optimizing for readField? + if (includedCount < columnCount && deserializeRead.isReadFieldSupported()) { + useReadField = true; + readFieldLogicalIndices = Arrays.copyOf(includedIndices, includedCount); + } + + } + /** * Initialize for converting the source data type that are going to be read with the * DeserializedRead interface passed to the constructor to the target data types desired in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 14ac8ee..73965ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -133,7 +133,7 @@ public void process(Object row, int tag) throws HiveException { // All are selected, do nothing } if (vrg.size > 0) { - forward(vrg, null, true); + vectorForward(vrg); } // Restore the original selected vector diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 75efc29..20af960 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -1170,7 +1170,7 @@ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buff } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 051d338..7edb059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -88,7 +88,7 @@ public void process(Object row, int tag) throws HiveException { batch.selected[i] = batch.selected[skipSize + i]; } } - forward(row, inputObjInspectors[tag], true); + vectorForward(batch); currCount += batch.size; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 497b12d..41a150d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -25,6 +26,7 @@ import java.util.Map; import java.util.concurrent.Future; +import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -110,6 +113,235 @@ public VectorizationContext getInputVectorizationContext() { return vContext; } + /** + * RESTRICTION: + * No MapJoin key or value expressions other than columns (i.e. ExprNodeColumnDesc). Big Table + * key and value columns can be easily determined. + * + * Big Table input maps: + * // Takes some row input and tells us which are key columns and which are value columns. + * Which input columns are the key columns. + * Which input columns are the value columns. + * // E.g. [0, 2, 10] is ctinyint (type: tinyint), cint (type: int), cboolean1 (type: boolean) + * // Input column names are _col0, _col1, _col2 by SELECT + * // so 0, 2 are the keys + * // where 1 is the value + * + * Big Table retain are input Big Table column numbers kept in the output (in output order). + * + * Big Table output mapping: + * + * // When Big Table output result starts at 0, then: + * // keys are [0, 2] + * // value is [1] + * // Needed to map Map Join output result keys and values to key and value expressions + * // that represent the Big Table input row so Auxiliary RS can be created. + * // If other order, then would start at smallTableResultSize offset. + * + * How to rename Auxiliary RS output (which is Big Table input) to _colN form? + * KEY.reducesinkkey 0 .. K - 1 are _outN where N is key map [keyNum] + * VALUE._outN are _outN where N is value map [valueNum] + */ + public static class FullOuterMapJoinBigTableInfo { + + private final int[] inputKeyColumnMap; + private final String[] inputKeyColumnNames; + private final int[] inputValueColumnMap; + private final String[] inputValueColumnNames; + + private final int[] outputKeyColumnMap; + private final int[] outputValueColumnMap; + + private boolean isBigTableFirst; + private final int bigTableResultSize; + private final int smallTableResultSize; + + public FullOuterMapJoinBigTableInfo( + int[] inputKeyColumnMap, + String[] inputKeyColumnNames, + int[] inputValueColumnMap, + String[] inputValueColumnNames, + int[] outputKeyColumnMap, + int[] outputValueColumnMap, + boolean isBigTableFirst, + int bigTableResultSize, + int smallTableResultSize) { + this.inputKeyColumnMap = inputKeyColumnMap; + this.inputKeyColumnNames = inputKeyColumnNames; + this.inputValueColumnMap = inputValueColumnMap; + this.inputValueColumnNames = inputValueColumnNames; + + this.outputKeyColumnMap = outputKeyColumnMap; + this.outputValueColumnMap = outputValueColumnMap; + + this.isBigTableFirst = isBigTableFirst; + this.bigTableResultSize = bigTableResultSize; + this.smallTableResultSize = smallTableResultSize; + } + + public int[] getInputKeyColumnMap() { + return inputKeyColumnMap; + } + public String[] getInputKeyColumnNames() { + return inputKeyColumnNames; + } + public int[] getInputValueColumnMap() { + return inputValueColumnMap; + } + public String[] getInputValueColumnNames() { + return inputValueColumnNames; + } + + public int[] getOutputKeyColumnNums() { + return outputKeyColumnMap; + } + public int[] getOutputValueColumnNums() { + return outputValueColumnMap; + } + + public boolean getIsBigTableFirst() { + return isBigTableFirst; + } + public int getBigTableResultSize() { + return bigTableResultSize; + } + public int getSmallTableResultSize() { + return smallTableResultSize; + } + } + + public static FullOuterMapJoinBigTableInfo getFullOuterMapJoinBigTableInfo(MapJoinDesc desc) { + + final byte posBigTable = (byte) desc.getPosBigTable(); + + List keyExprs = desc.getKeys().get(posBigTable); + final int keySize = keyExprs.size(); + List bigTableExprs = desc.getExprs().get(posBigTable); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + + final int outputColumnCount = desc.getOutputColumnNames().size(); + TypeInfo[] outputTypeInfos = new TypeInfo[outputColumnCount]; + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + final int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + List inputKeyColumnNumList = new ArrayList(); + List inputKeyColumnNameList = new ArrayList(); + List inputValueColumnNumList = new ArrayList(); + List inputValueColumnNameList = new ArrayList(); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + Map columnMap = new HashMap(); + for (int i = 0; i < bigTableRetainSize; i++) { + + ExprNodeDesc exprNodeDesc = bigTableExprs.get(i); + if (!(exprNodeDesc instanceof ExprNodeColumnDesc)) { + return null; + } + ExprNodeColumnDesc bigTableColumnExpr = (ExprNodeColumnDesc) exprNodeDesc; + TypeInfo typeInfo = bigTableColumnExpr.getTypeInfo(); + + outputTypeInfos[nextOutputColumn] = typeInfo; + + columnMap.put(bigTableColumnExpr.getColumn(), i); + nextOutputColumn++; + } + + for (int i = 0; i < keySize; i++) { + ExprNodeDesc keyExpr = keyExprs.get(i); + if (!(keyExpr instanceof ExprNodeColumnDesc)) { + return null; + } + ExprNodeColumnDesc keyColumnEpxr = (ExprNodeColumnDesc) keyExpr; + String columnName = keyColumnEpxr.getColumn(); + Integer columnNum = columnMap.get(columnName); + if (columnNum == null) { + + // Not all the keys are retained. + return null; + } + inputKeyColumnNumList.add(columnNum); + inputKeyColumnNameList.add(columnName); + } + + for (int i = 0; i < bigTableRetainSize; i++) { + if (inputKeyColumnNumList.contains(i)) { + continue; + } + inputValueColumnNumList.add(i); + ExprNodeColumnDesc bigTableExpr = (ExprNodeColumnDesc) bigTableExprs.get(i); + inputValueColumnNameList.add(bigTableExpr.getColumn()); + } + + // UNDONE: + List outputKeyColumnNumList = new ArrayList(); + List outputValueColumnNumList = new ArrayList(); + + // UNDONE + outputKeyColumnNumList.addAll(inputKeyColumnNumList); + outputValueColumnNumList.addAll(inputValueColumnNumList); + + int[] inputKeyColumnNums = + ArrayUtils.toPrimitive(inputKeyColumnNumList.toArray(new Integer[0])); + String[] inputKeyColumnNames = + inputKeyColumnNameList.toArray(new String[0]); + int[] inputValueColumnNums = + ArrayUtils.toPrimitive(inputValueColumnNumList.toArray(new Integer[0])); + String[] inputValueColumnNames = + inputValueColumnNameList.toArray(new String[0]); + + int[] outputKeyColumnNums = + ArrayUtils.toPrimitive(outputKeyColumnNumList.toArray(new Integer[0])); + int[] outputValueColumnNums = + ArrayUtils.toPrimitive(outputValueColumnNumList.toArray(new Integer[0])); + + boolean isBigTableFirst = (order[0] == posBigTable); + + return new FullOuterMapJoinBigTableInfo( + inputKeyColumnNums, + inputKeyColumnNames, + inputValueColumnNums, + inputValueColumnNames, + outputKeyColumnNums, + outputValueColumnNums, + isBigTableFirst, + bigTableRetainSize, + smallTableResultSize); + } + public static TypeInfo[] getOutputTypeInfos(MapJoinDesc desc) { final byte posBigTable = (byte) desc.getPosBigTable(); @@ -132,7 +364,8 @@ public VectorizationContext getInputVectorizationContext() { int[] smallTableIndices; int smallTableIndicesSize; List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { @@ -141,7 +374,8 @@ public VectorizationContext getInputVectorizationContext() { } List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); - final int smallTableRetainSize = smallTableRetainList.size(); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); int smallTableResultSize = 0; if (smallTableIndicesSize > 0) { @@ -216,6 +450,7 @@ public VectorizationContext getInputVectorizationContext() { return outputTypeInfos; } + @Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); @@ -234,7 +469,6 @@ public void initializeOp(Configuration hconf) throws HiveException { */ @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { - Object[] values = (Object[]) row; VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); @@ -242,7 +476,11 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch, outputBatch.size, values); + if (row instanceof ArrayList) { + va.assignRow(outputBatch, outputBatch.size, (ArrayList) row); + } else { + va.assignRow(outputBatch, outputBatch.size, (Object[]) row); + } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { @@ -251,7 +489,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } @@ -263,8 +501,10 @@ public void closeOp(boolean aborted) throws HiveException { tableContainer.dumpMetrics(); } } - if (!aborted && 0 < outputBatch.size) { - flushOutput(); + if (!aborted) { + if (outputBatch.size > 0) { + flushOutput(); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index a84bd72..3980ba7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -19,8 +19,13 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -28,6 +33,8 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.FullOuterMapJoinBigTableInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -76,6 +83,12 @@ private VectorExpressionWriter[] rowWriters; // Writer for producing row from input batch protected transient Object[] singleRow; + private transient VectorCopyRow auxiliaryVectorCopy; + + private transient VectorizedRowBatch auxiliaryOutputBatch; + + private transient int[] auxiliaryNullColumnNums; + /** Kryo ctor. */ @VisibleForTesting public VectorMapJoinOperator() { @@ -181,11 +194,6 @@ protected Object _evaluate(Object row, int version) throws HiveException { } // Now replace the old evaluators with our own joinValues[posBigTable] = vectorNodeEvaluators; - - // Filtering is handled in the input batch processing - if (filterMaps != null) { - filterMaps[posBigTable] = null; - } } @Override @@ -194,6 +202,90 @@ protected Object _evaluate(Object row, int version) throws HiveException { return dest.setFromVector(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch); } + /* + * This variation is for FULL OUTER MapJoin. It does key match tracking only if the key has + * no NULLs. + */ + @Override + protected JoinUtil.JoinResult setMapJoinKeyNoNulls(ReusableGetAdaptor dest, Object row, byte alias, + MatchTracker matchTracker) + throws HiveException { + return dest.setFromVectorNoNulls(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + protected boolean setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromVectorNoResult( + keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, matchTracker); + } + + private int[] getNonBigKeyNullColumnNums(FullOuterMapJoinBigTableInfo fullOuterMapJoinBigTableInfo) { + Set auxiliaryNullColumnSet = new TreeSet(); + for (Integer i : fullOuterMapJoinBigTableInfo.getInputValueColumnMap()) { + auxiliaryNullColumnSet.add(i); + } + int smallTableColumnNum = + (fullOuterMapJoinBigTableInfo.getIsBigTableFirst() ? + fullOuterMapJoinBigTableInfo.getBigTableResultSize() : 0); + for (int i = 0; i < fullOuterMapJoinBigTableInfo.getSmallTableResultSize(); i++) { + auxiliaryNullColumnSet.add(i); + } + ArrayList auxiliaryNullColumnList = new ArrayList(); + auxiliaryNullColumnList.addAll(auxiliaryNullColumnSet); + return ArrayUtils.toPrimitive(auxiliaryNullColumnList.toArray(new Integer[0])); + } + + @Override + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + + if (auxiliaryVectorCopy == null) { + + FullOuterMapJoinBigTableInfo fullOuterMapJoinBigTableInfo = + getFullOuterMapJoinBigTableInfo(conf); + int[] inputKeyColumnMap = fullOuterMapJoinBigTableInfo.getInputKeyColumnMap(); + + auxiliaryVectorCopy = new VectorCopyRow(); + auxiliaryVectorCopy.init( + inputKeyColumnMap, + fullOuterMapJoinBigTableInfo.getOutputKeyColumnNums(), + Arrays.copyOf(vOutContext.getInitialTypeInfos(), inputKeyColumnMap.length)); + + auxiliaryOutputBatch = VectorizedBatchUtil.makeLike(outputBatch); + + auxiliaryNullColumnNums = + getNonBigKeyNullColumnNums(fullOuterMapJoinBigTableInfo); + } + + VectorizedRowBatch inBatch = (VectorizedRowBatch) firstTimeMatchRow; + auxiliaryVectorCopy.copyByValue( + inBatch, batchIndex, + auxiliaryOutputBatch, auxiliaryOutputBatch.size); + for (int columnNum : auxiliaryNullColumnNums) { + ColumnVector colVector = auxiliaryOutputBatch.cols[columnNum]; + colVector.isNull[auxiliaryOutputBatch.size] = true; + colVector.noNulls = true; + } + + ++auxiliaryOutputBatch.size; + if (auxiliaryOutputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + flushAuxiliaryOutput(); + } + } + + private void flushAuxiliaryOutput() throws HiveException { + vectorForwardAuxiliary(auxiliaryOutputBatch); + auxiliaryOutputBatch.reset(); + } + @Override public void process(Object row, int tag) throws HiveException { @@ -240,6 +332,16 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + if (auxiliaryOutputBatch != null && auxiliaryOutputBatch.size > 0) { + flushAuxiliaryOutput(); + } + } + } + + @Override protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { // Extract the actual row from row batch diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 35f810f..a88d2c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -324,7 +324,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 22d2f34..2f296c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -136,7 +136,7 @@ public void process(Object row, int tag) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { - forward(row, inputObjInspectors[tag], true); + vectorForward((VectorizedRowBatch) row); return; } @@ -155,7 +155,7 @@ public void process(Object row, int tag) throws HiveException { int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector, true); + vectorForward((VectorizedRowBatch) row); // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 93212ce..7086317 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -385,9 +385,6 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) thr if (initialDataTypePhysicalVariations == null) { return null; } - if (columnNum < 0) { - fake++; - } if (columnNum < initialDataTypePhysicalVariations.size()) { return initialDataTypePhysicalVariations.get(columnNum); } @@ -1648,8 +1645,6 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd return vectorExpression; } - static int fake = 0; - private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 4407961..2ae2609 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -285,21 +285,10 @@ protected String getDoubleValueParamString(int typeNum, double value) { } protected String getParamTypeString(int typeNum) { - if (inputTypeInfos == null || inputDataTypePhysicalVariations == null) { - fake++; - } - if (typeNum >= inputTypeInfos.length || typeNum >= inputDataTypePhysicalVariations.length) { - fake++; - } return getTypeName(inputTypeInfos[typeNum], inputDataTypePhysicalVariations[typeNum]); } - static int fake; - public static String getTypeName(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) { - if (typeInfo == null) { - fake++; - } if (dataTypePhysicalVariation != null && dataTypePhysicalVariation != DataTypePhysicalVariation.NONE) { return typeInfo.toString() + "/" + dataTypePhysicalVariation; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..d8c9f1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; @@ -55,14 +54,17 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -124,6 +126,10 @@ protected void initLoggingPrefix(String className) { // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +155,70 @@ protected void initLoggingPrefix(String className) { protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + * The Big Table key columns are from the key expressions. + * The Big Table value columns are from the getExpr(posBigTable) expressions. + * Any calculations needed for those will be scratch columns. + * + * The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + * Any Big Table Batch columns that will be in the output result. + * 0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + * For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + * result but are needed for the Small Table output result, they are put in this mapping + * as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + * For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + * output result from the Big Table key columns. The Big Table keys cannot be projected since + * on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + * For FULL OUTER MapJoin, the mapping from any needed Small Table key columns to their area + * in the output result. + * + * For deserializing a FULL OUTER non-match Small Table key into the output result. + * Can be partial or empty if some or all Small Table key columns are not retained. + * + * Small Table Value Mapping + * The mapping from Small Table value columns to their area in the output result. + * + * For deserializing Small Table value into the output result. + * + * It is the Small Table value index to output column numbers and TypeInfos. + * That is, a mapping of the LazyBinary field order to output batch scratch columns for the + * small table portion. + * Or, to use the output column nums for OUTER Small Table value NULLs. + * + */ + protected int[] bigTableRetainColumnMap; + protected TypeInfo[] bigTableRetainTypeInfos; + + protected int[] nonOuterSmallTableKeyColumnMap; + protected TypeInfo[] nonOuterSmallTableKeyTypeInfos; - // This is a mapping of which keys will be copied from the big table (input and key expressions) - // to the small table result portion of the output for outer join. - protected VectorColumnOutputMapping bigTableOuterKeyMapping; + protected VectorColumnOutputMapping outerSmallTableKeyMapping; - // This is a mapping of the values in the small table hash table that will be copied to the - // small table result portion of the output. That is, a mapping of the LazyBinary field order - // to output batch scratch columns for the small table portion. - protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + protected VectorColumnSourceMapping smallTableValueMapping; + + // The MapJoin output result projection for both the Big Table input batch and the overflow batch. protected VectorColumnSourceMapping projectionMapping; // These are the output columns for the small table and the outer small table keys. - protected int[] smallTableOutputVectorColumns; - protected int[] bigTableOuterKeyOutputVectorColumns; + protected int[] outerSmallTableKeyColumnMap; + protected int[] smallTableValueColumnMap; // These are the columns in the big and small table that are ByteColumnVector columns. // We create data buffers for these columns so we can copy strings into those columns by value. protected int[] bigTableByteColumnVectorColumns; + protected int[] nonOuterSmallTableKeyByteColumnVectorColumns; + protected int[] outerSmallTableKeyByteColumnVectorColumns; protected int[] smallTableByteColumnVectorColumns; // The above members are initialized by the constructor and must not be @@ -186,13 +234,22 @@ protected void initLoggingPrefix(String className) { // portion of the join output. protected transient VectorCopyRow bigTableRetainedVectorCopy; + // This helper object deserializes BinarySortable format small table keys into columns of a row + // in a vectorized row batch. + protected int[] allSmallTableKeyColumnNums; + protected boolean[] allSmallTableKeyColumnIncluded; + protected transient VectorDeserializeRow smallTableKeyOuterVectorDeserializeRow; + + protected transient VectorCopyRow nonOuterSmallTableKeyVectorCopy; + + // UNDONE // A helper object that efficiently copies the big table key columns (input or key expressions) - // that appear in the small table portion of the join output for outer joins. - protected transient VectorCopyRow bigTableVectorCopyOuterKeys; + // that appear in the small table portion of the join output. + protected transient VectorCopyRow outerSmallTableKeyVectorCopy; // This helper object deserializes LazyBinary format small table values into columns of a row // in a vectorized row batch. - protected transient VectorDeserializeRow smallTableVectorDeserializeRow; + protected transient VectorDeserializeRow smallTableValueVectorDeserializeRow; // This a 2nd batch with the same "column schema" as the big table batch that can be used to // build join output results in. If we can create some join output results in the big table @@ -207,6 +264,9 @@ protected void initLoggingPrefix(String className) { // Whether the native vectorized map join operator has performed its common setup. protected transient boolean needCommonSetup; + // Whether the native vectorized map join operator has performed its first batch setup. + protected transient boolean needFirstBatchSetup; + // Whether the native vectorized map join operator has performed its // native vector map join hash table setup. protected transient boolean needHashTableSetup; @@ -214,6 +274,9 @@ protected void initLoggingPrefix(String className) { // The small table hash table for the native vectorized map join operator. protected transient VectorMapJoinHashTable vectorMapJoinHashTable; + protected transient long batchCounter; + protected transient long rowCounter; + /** Kryo ctor. */ protected VectorMapJoinCommonOperator() { super(); @@ -246,9 +309,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); isOuterJoin = !desc.getNoOuterJoin(); - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinVariation = this.vectorDesc.getVectorMapJoinVariation(); + hashTableKind = this.vectorDesc.getHashTableKind(); + hashTableKeyType = this.vectorDesc.getHashTableKeyType(); bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); @@ -260,11 +323,19 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); - bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); + bigTableFilterExpressions = vectorMapJoinInfo.getBigTableFilterExpressions(); + + bigTableRetainColumnMap = vectorMapJoinInfo.getBigTableRetainColumnMap(); + bigTableRetainTypeInfos = vectorMapJoinInfo.getBigTableRetainTypeInfos(); + + nonOuterSmallTableKeyColumnMap = vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap(); + nonOuterSmallTableKeyTypeInfos = vectorMapJoinInfo.getNonOuterSmallTableKeyTypeInfos(); + + outerSmallTableKeyMapping = vectorMapJoinInfo.getOuterSmallTableKeyMapping(); - bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); + fullOuterSmallTableKeyMapping = vectorMapJoinInfo.getFullOuterSmallTableKeyMapping(); - smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); + smallTableValueMapping = vectorMapJoinInfo.getSmallTableValueMapping(); projectionMapping = vectorMapJoinInfo.getProjectionMapping(); @@ -273,47 +344,96 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, protected void determineCommonInfo(boolean isOuter) throws HiveException { - bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); - smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + + smallTableValueColumnMap = smallTableValueMapping.getOutputColumns(); // Which big table and small table columns are ByteColumnVector and need have their data buffer // to be manually reset for some join result processing? - bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); + bigTableByteColumnVectorColumns = + getByteColumnVectorColumns(bigTableRetainColumnMap, bigTableRetainTypeInfos); + + nonOuterSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); - smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); + outerSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(outerSmallTableKeyMapping); + + smallTableByteColumnVectorColumns = + getByteColumnVectorColumns(smallTableValueMapping); outputProjection = projectionMapping.getOutputColumns(); outputTypeInfos = projectionMapping.getTypeInfos(); - if (LOG.isDebugEnabled()) { + if (LOG.isInfoEnabled()) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); - - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " order " + + Arrays.toString(orderDisplayable)); + LOG.info(getLoggingPrefix() + " posBigTable " + + (int) posBigTable); + LOG.info(getLoggingPrefix() + " posSingleVectorMapJoinSmallTable " + + (int) posSingleVectorMapJoinSmallTable); + + LOG.info(getLoggingPrefix() + " bigTableKeyColumnMap " + + Arrays.toString(bigTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableKeyColumnNames " + + Arrays.toString(bigTableKeyColumnNames)); + LOG.info(getLoggingPrefix() + " bigTableKeyTypeInfos " + + Arrays.toString(bigTableKeyTypeInfos)); + + LOG.info(getLoggingPrefix() + " bigTableValueColumnMap " + + Arrays.toString(bigTableValueColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableValueColumnNames " + + Arrays.toString(bigTableValueColumnNames)); + LOG.info(getLoggingPrefix() + " bigTableValueTypeNames " + + Arrays.toString(bigTableValueTypeInfos)); + + LOG.info(getLoggingPrefix() + " getBigTableRetainColumnMap " + + Arrays.toString(bigTableRetainColumnMap)); + LOG.info(getLoggingPrefix() + " bigTableRetainTypeInfos " + + Arrays.toString(bigTableRetainTypeInfos)); + + LOG.info(getLoggingPrefix() + " nonOuterSmallTableKeyColumnMap " + + Arrays.toString(nonOuterSmallTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " nonOuterSmallTableKeyTypeInfos " + + Arrays.toString(nonOuterSmallTableKeyTypeInfos)); + + LOG.info(getLoggingPrefix() + " outerSmallTableKeyMapping " + + outerSmallTableKeyMapping.toString()); + + LOG.info(getLoggingPrefix() + " fullOuterSmallTableKeyMapping " + + fullOuterSmallTableKeyMapping.toString()); + + LOG.info(getLoggingPrefix() + " smallTableValueMapping " + + smallTableValueMapping.toString()); + + LOG.info(getLoggingPrefix() + " bigTableByteColumnVectorColumns " + + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " smallTableByteColumnVectorColumns " + + Arrays.toString(smallTableByteColumnVectorColumns)); + + LOG.info(getLoggingPrefix() + " outputProjection " + + Arrays.toString(outputProjection)); + LOG.info(getLoggingPrefix() + " outputTypeInfos " + + Arrays.toString(outputTypeInfos)); + + LOG.info(getLoggingPrefix() + " mapJoinDesc.getKeysString " + + conf.getKeysString()); + if (conf.getValueIndices() != null) { + for (Entry entry : conf.getValueIndices().entrySet()) { + LOG.info(getLoggingPrefix() + " mapJoinDesc.getValueIndices +" + + (int) entry.getKey() + " " + Arrays.toString(entry.getValue())); + } + } + LOG.info(getLoggingPrefix() + " mapJoinDesc.getExprs " + + conf.getExprs().toString()); + LOG.info(getLoggingPrefix() + " mapJoinDesc.getRetainList " + + conf.getRetainList().toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -323,11 +443,14 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { * Determine from a mapping which columns are BytesColumnVector columns. */ private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) { + return getByteColumnVectorColumns(mapping.getOutputColumns(), mapping.getTypeInfos()); + } + + private int[] getByteColumnVectorColumns(int[] outputColumns, TypeInfo[] typeInfos) { + // Search mapping for any strings and return their output columns. ArrayList list = new ArrayList(); - int count = mapping.getCount(); - int[] outputColumns = mapping.getOutputColumns(); - TypeInfo[] typeInfos = mapping.getTypeInfos(); + final int count = outputColumns.length; for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; String typeName = typeInfos[i].getTypeName(); @@ -345,10 +468,12 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { */ protected void setupVOutContext(List outputColumnNames) { if (LOG.isDebugEnabled()) { - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputColumnNames " + outputColumnNames); + LOG.debug(getLoggingPrefix() + " outputColumnNames " + outputColumnNames); } if (outputColumnNames.size() != outputProjection.length) { - throw new RuntimeException("Output column names " + outputColumnNames + " length and output projection " + Arrays.toString(outputProjection) + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); + throw new RuntimeException("Output column names " + outputColumnNames + + " length and output projection " + Arrays.toString(outputProjection) + + " / " + Arrays.toString(outputTypeInfos) + " length mismatch"); } vOutContext.resetProjectionColumns(); for (int i = 0; i < outputColumnNames.size(); ++i) { @@ -357,7 +482,8 @@ protected void setupVOutContext(List outputColumnNames) { vOutContext.addProjectionColumn(columnName, outputColumn); if (LOG.isDebugEnabled()) { - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor addProjectionColumn " + i + " columnName " + columnName + " outputColumn " + outputColumn); + LOG.debug(getLoggingPrefix() + " addProjectionColumn " + i + " columnName " + columnName + + " outputColumn " + outputColumn); } } } @@ -386,9 +512,59 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { return hashTableLoader; } + /* + * Do FULL OUTER MapJoin operator initialization. + */ + private void initializeFullOuterObjects() throws HiveException { + + // The Small Table key type jnfo is the same as Big Table's. + TypeInfo[] smallTableKeyTypeInfos = bigTableKeyTypeInfos; + final int allKeysSize = smallTableKeyTypeInfos.length; + + /* + * The VectorMapJoinFullOuter[Intersect]{Long|MultiKey|String}Operator outputs 0, 1, or more + * Small Key columns in the join result. + */ + allSmallTableKeyColumnNums = new int[allKeysSize]; + Arrays.fill(allSmallTableKeyColumnNums, -1); + allSmallTableKeyColumnIncluded = new boolean[allKeysSize]; + + final int outputKeysSize = fullOuterSmallTableKeyMapping.getCount(); + int[] outputKeyNums = fullOuterSmallTableKeyMapping.getInputColumns(); + int[] outputKeyOutputColumns = fullOuterSmallTableKeyMapping.getOutputColumns(); + for (int i = 0; i < outputKeysSize; i++) { + final int outputKeyNum = outputKeyNums[i]; + allSmallTableKeyColumnNums[outputKeyNum] = outputKeyOutputColumns[i]; + allSmallTableKeyColumnIncluded[outputKeyNum] = true; + } + + if (hashTableKeyType == HashTableKeyType.MULTI_KEY && + outputKeysSize > 0) { + + smallTableKeyOuterVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + smallTableKeyTypeInfos, + /* useExternalBuffer */ true)); + smallTableKeyOuterVectorDeserializeRow.init( + allSmallTableKeyColumnNums, allSmallTableKeyColumnIncluded); + } + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !conf.isDynamicPartitionHashJoin() && + !conf.isFullOuterIntersect()) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator.. + auxiliaryChildIndex = 1; + } + VectorExpression.doTransientInit(bigTableFilterExpressions); VectorExpression.doTransientInit(bigTableKeyExpressions); VectorExpression.doTransientInit(bigTableValueExpressions); @@ -405,23 +581,34 @@ protected void initializeOp(Configuration hconf) throws HiveException { /* * Create our vectorized copy row and deserialize row helper objects. */ - if (smallTableMapping.getCount() > 0) { - smallTableVectorDeserializeRow = + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + initializeFullOuterObjects(); + } + + if (smallTableValueMapping.getCount() > 0) { + smallTableValueVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - smallTableMapping.getTypeInfos(), + smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */ true)); - smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); + smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns()); } - if (bigTableRetainedMapping.getCount() > 0) { + if (bigTableRetainColumnMap.length > 0) { bigTableRetainedVectorCopy = new VectorCopyRow(); - bigTableRetainedVectorCopy.init(bigTableRetainedMapping); + bigTableRetainedVectorCopy.init( + bigTableRetainColumnMap, bigTableRetainTypeInfos); + } + + if (nonOuterSmallTableKeyColumnMap.length > 0) { + nonOuterSmallTableKeyVectorCopy = new VectorCopyRow(); + nonOuterSmallTableKeyVectorCopy.init( + nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); } - if (bigTableOuterKeyMapping.getCount() > 0) { - bigTableVectorCopyOuterKeys = new VectorCopyRow(); - bigTableVectorCopyOuterKeys.init(bigTableOuterKeyMapping); + if (outerSmallTableKeyMapping.getCount() > 0) { + outerSmallTableKeyVectorCopy = new VectorCopyRow(); + outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping); } /* @@ -430,6 +617,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { overflowBatch = setupOverflowBatch(); needCommonSetup = true; + needFirstBatchSetup = true; needHashTableSetup = true; if (LOG.isDebugEnabled()) { @@ -553,29 +741,46 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, } /* - * Common one time setup by native vectorized map join operator's processOp. + * Common one time setup for Native Vector MapJoin operator. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { + protected void commonSetup() throws HiveException { - if (LOG.isDebugEnabled()) { - LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); - displayBatchColumns(batch, "batch"); - displayBatchColumns(overflowBatch, "overflowBatch"); + /* + * Make sure big table BytesColumnVectors have room for string values in the overflow batch... + */ + for (int column: bigTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); } - // Make sure big table BytesColumnVectors have room for string values in the overflow batch... - for (int column: bigTableByteColumnVectorColumns) { + for (int column : nonOuterSmallTableKeyByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + for (int column : outerSmallTableKeyByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } + batchCounter = 0; + rowCounter = 0; + } + + /* + * Common one time setup by native vectorized map join operator's first batch. + */ + public void firstBatchSetup(VectorizedRowBatch batch) throws HiveException { // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); - bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; - bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled @@ -583,6 +788,67 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { spillReplayBatch = VectorizedBatchUtil.makeLike(batch); } + /* + * Perform any Native Vector MapJoin operator specific hash table setup. + */ + public void hashTableSetup() throws HiveException { + } + + /* + * Perform the Native Vector MapJoin operator work. + */ + public abstract void processBatch(VectorizedRowBatch batch) throws HiveException; + + /* + * Common process method for all Native Vector MapJoin operators. + * + * Do common initialization work and invoke the override-able common setup methods. + * + * Then, invoke the processBatch override method to do the operator work. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + VectorizedRowBatch batch = (VectorizedRowBatch) row; + alias = (byte) tag; + + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needFirstBatchSetup) { + + // Our one time first-batch method initialization. + firstBatchSetup(batch); + + needFirstBatchSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + batchCounter++; + + if (batch.size == 0) { + return; + } + + rowCounter += batch.size; + + processBatch(batch); + } + protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java new file mode 100644 index 0000000..867fe7a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import java.io.IOException; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +// Single-Column Long specific imports. +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/** + * Specialized class for doing a Native Vector FULL OUTER INTERSECT MapJoin on a Single-Column Long + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectLongOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column Long specific declarations. + */ + + // The one join column for this specialized class. + LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn]; + long[] vector = joinColVector.vector; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + if (!hashMap.lookupNoResult( + vector[batchIndex], fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + if (!hashMap.lookupNoResult( + vector[batchIndex], fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java new file mode 100644 index 0000000..75f0bd4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER INTERSECT MapJoin on a Multi-Key + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Multi-Key specific declarations. + */ + + // None. + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + if (!hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + if (!hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java new file mode 100644 index 0000000..eed4900 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER INTERSECT MapJoin on a Single-Column + * String using a hash map. + */ +public class VectorMapJoinFullOuterIntersectStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectStringOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column String specific declarations. + */ + + // The one join column for this specialized class. + BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn]; + byte[][] vector = joinColVector.vector; + int[] start = joinColVector.start; + int[] length = joinColVector.length; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + if (!hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + if (!hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker)) { + + // Spill the row. + spillRow(batch, batchIndex, hashMap.spillPartitionId()); + } + } + } + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java new file mode 100644 index 0000000..0a16509 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Single-Column Long + * using a hash map. + */ +public class VectorMapJoinFullOuterLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterLongOperator() { + super(); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java new file mode 100644 index 0000000..4953cc2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Multi-Keyg + * using a hash map. + */ +public class VectorMapJoinFullOuterMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java new file mode 100644 index 0000000..4572374 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Specialized class for doing a Native Vector FULL OUTER MapJoin on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterStringOperator() { + super(); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 92ec1ee..b48a481 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; @@ -93,9 +92,6 @@ private transient Thread ownThread; private transient int interruptCheckCounter = CHECK_INTERRUPT_PER_OVERFLOW_BATCHES; - // Debug display. - protected transient long batchCounter; - /** Kryo ctor. */ protected VectorMapJoinGenerateResultOperator() { super(); @@ -124,13 +120,6 @@ private void setUpInterruptChecking() { ownThread = Thread.currentThread(); } - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); - - batchCounter = 0; - - } - //------------------------------------------------------------------------------------------------ protected void performValueExpressions(VectorizedRowBatch batch, @@ -157,24 +146,24 @@ protected void performValueExpressions(VectorizedRowBatch batch, batch.selectedInUse = saveSelectedInUse; } - protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, + protected void doSmallTableValueDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException { byte[] bytes = byteSegmentRef.getBytes(); int offset = (int) byteSegmentRef.getOffset(); int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + smallTableValueVectorDeserializeRow.setBytes(bytes, offset, length); try { // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. - smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex); + smallTableValueVectorDeserializeRow.deserializeByRef(batch, batchIndex); } catch (Exception e) { throw new HiveException( "\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + - smallTableVectorDeserializeRow.getDetailedReadPositionString(), + smallTableValueVectorDeserializeRow.getDetailedReadPositionString(), e); } } @@ -215,22 +204,23 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; - // Outer key copying is only used when we are using the input BigTable batch as the output. - // - if (bigTableVectorCopyOuterKeys != null) { - // Copy within row. - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // same batch by reference. + // + outerSmallTableKeyVectorCopy.copyByReference( + batch, batchIndex, + batch, batchIndex); } - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(batch, batchIndex, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult); } - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table"); - // Use the big table row as output. batch.selected[numSel++] = batchIndex; } @@ -273,26 +263,45 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. - // Note this includes any outer join keys that need to go into the small table "area". + // if (bigTableRetainedVectorCopy != null) { - bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, - overflowBatch, overflowBatch.size); + bigTableRetainedVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - if (smallTableVectorDeserializeRow != null) { + if (nonOuterSmallTableKeyVectorCopy != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, - byteSegmentRef, hashMapResult); + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow"); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area across + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } overflowBatch.size++; if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { @@ -333,8 +342,8 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, // Fill up as much of the overflow batch as possible with small table values. while (byteSegmentRef != null) { - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); } @@ -361,9 +370,40 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; if (bigTableRetainedVectorCopy != null) { + // The one big table row's values repeat. - bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableRetainedMapping.getOutputColumns()) { + bigTableRetainedVectorCopy.copyByReference( + batch, batchIndex, + overflowBatch, 0); + for (int column : bigTableRetainColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + if (nonOuterSmallTableKeyVectorCopy != null) { + + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + for (int column : nonOuterSmallTableKeyColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + int[] outerSmallTableKeyColumnMap = null; + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + for (int column : outerSmallTableKeyColumnMap) { overflowBatch.cols[column].isRepeating = true; } } @@ -373,10 +413,20 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, forwardOverflowNoReset(); // Hand reset the big table columns. - for (int column : bigTableRetainedMapping.getOutputColumns()) { + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + for (int column : nonOuterSmallTableKeyColumnMap) { ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } + if (outerSmallTableKeyColumnMap != null) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + } } byteSegmentRef = hashMapResult.next(); @@ -476,22 +526,16 @@ private void setupSpillSerDe(VectorizedRowBatch batch) throws HiveException { } private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, - VectorMapJoinHashTableResult hashTableResult) throws IOException { - - int partitionId = hashTableResult.spillPartitionId(); + int partitionId) throws IOException { HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); -// int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); -// int length = output.getLength() - offset; rowBytesContainer.finishRow(); - -// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); } protected void spillHashMapBatch(VectorizedRowBatch batch, @@ -509,8 +553,18 @@ protected void spillHashMapBatch(VectorizedRowBatch batch, int hashTableResultIndex = spillHashTableResultIndices[i]; VectorMapJoinHashTableResult hashTableResult = hashTableResults[hashTableResultIndex]; - spillSerializeRow(batch, batchIndex, hashTableResult); + spillSerializeRow(batch, batchIndex, hashTableResult.spillPartitionId()); + } + } + + protected void spillRow(VectorizedRowBatch batch, int batchIndex, int partitionId) + throws HiveException, IOException { + + if (bigTableVectorSerializeRow == null) { + setupSpillSerDe(batch); } + + spillSerializeRow(batch, batchIndex, partitionId); } protected void spillBatchRepeated(VectorizedRowBatch batch, @@ -525,7 +579,7 @@ protected void spillBatchRepeated(VectorizedRowBatch batch, for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - spillSerializeRow(batch, batchIndex, hashTableResult); + spillSerializeRow(batch, batchIndex, hashTableResult.spillPartitionId()); } } @@ -541,8 +595,8 @@ protected void reloadHashTable(byte pos, int partitionId) MapJoinTableContainer smallTable = spilledMapJoinTables[pos]; - vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, - smallTable); + vectorMapJoinHashTable = + VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, smallTable); needHashTableSetup = true; LOG.info("Created " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName()); @@ -637,7 +691,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; - forward(batch, null, true); + vectorForward(batch); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; @@ -649,7 +703,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); overflowBatch.reset(); maybeCheckInterrupt(); } @@ -666,7 +720,7 @@ private void maybeCheckInterrupt() throws HiveException { * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); } /* @@ -679,6 +733,11 @@ private void forwardOverflowNoReset() throws HiveException { @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); + + // NOTE: The closeOp call on super MapJoinOperator can trigger Hybrid Grace additional + // NOTE: processing and also FULL OUTER MapJoin non-match Small Table result generation. So, + // NOTE: we flush the overflowBatch after the call. + // if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index f791d95..35ddddd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -103,25 +103,25 @@ public VectorMapJoinInnerBigOnlyGenerateResultOperator(CompilationOpContext ctx, /* * Setup our inner big table only join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner big-table only join specific. VectorMapJoinHashMultiSet baseHashMultiSet = (VectorMapJoinHashMultiSet) vectorMapJoinHashTable; - hashMultiSetResults = new VectorMapJoinHashMultiSetResult[batch.DEFAULT_SIZE]; + hashMultiSetResults = new VectorMapJoinHashMultiSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMultiSetResults.length; i++) { hashMultiSetResults[i] = baseHashMultiSet.createHashMultiSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesValueCounts = new long[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesValueCounts = new long[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 678fa42..30a19b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -102,45 +102,36 @@ public VectorMapJoinInnerBigOnlyLongOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { + protected void commonSetup() throws HiveException { + super.commonSetup(); - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ + /* + * Initialize Single-Column Long members for this specialized class. + */ - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. - - /* - * Get our Single-Column Long hash multi-set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; - useMinMax = hashMultiSet.useMinMax(); - if (useMinMax) { - min = hashMultiSet.min(); - max = hashMultiSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ + + hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; + useMinMax = hashMultiSet.useMinMax(); + if (useMinMax) { + min = hashMultiSet.min(); + max = hashMultiSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -153,11 +144,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 866aa60..f587517 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -109,45 +108,40 @@ public VectorMapJoinInnerBigOnlyMultiKeyOperator(CompilationOpContext ctx, Opera // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Multi-Key members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Multi-Key members for this specialized class. - */ + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash multi-set information for this specialized class. + */ - /* - * Get our Multi-Key hash multi-set information for this specialized class. - */ - - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -160,11 +154,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index a0c3b9c..e373db1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -98,40 +97,31 @@ public VectorMapJoinInnerBigOnlyStringOperator(CompilationOpContext ctx, Operato // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash multi-set information for this specialized class. - */ + /* + * Get our Single-Column String hash multi-set information for this specialized class. + */ - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index ea2c04d..dc5d046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -108,26 +108,26 @@ public VectorMapJoinInnerGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our inner join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } /* @@ -142,7 +142,7 @@ protected void innerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 36404bc..5ac606a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -101,45 +100,36 @@ public VectorMapJoinInnerLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -151,11 +141,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index 620101f..cdee3fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -107,45 +106,36 @@ public VectorMapJoinInnerMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash map information for this specialized class. - */ + /* + * Get our Multi-Key hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -157,11 +147,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index d99d514..8e6697e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -97,40 +96,31 @@ public VectorMapJoinInnerStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash map information for this specialized class. - */ + /* + * Get our Single-Column String hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -142,11 +132,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index f68d4c4..71ec56b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -89,21 +89,21 @@ public VectorMapJoinLeftSemiGenerateResultOperator(CompilationOpContext ctx, Ope /* * Setup our left semi join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Semi join specific. VectorMapJoinHashSet baseHashSet = (VectorMapJoinHashSet) vectorMapJoinHashTable; - hashSetResults = new VectorMapJoinHashSetResult[batch.DEFAULT_SIZE]; + hashSetResults = new VectorMapJoinHashSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashSetResults.length; i++) { hashSetResults[i] = baseHashSet.createHashSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 4185c5b..40e7cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; @@ -102,45 +101,36 @@ public VectorMapJoinLeftSemiLongOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; - useMinMax = hashSet.useMinMax(); - if (useMinMax) { - min = hashSet.min(); - max = hashSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash set information for this specialized class. + */ + + hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; + useMinMax = hashSet.useMinMax(); + if (useMinMax) { + min = hashSet.min(); + max = hashSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -153,11 +143,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index 541e7fa..e5d9fda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -108,45 +107,36 @@ public VectorMapJoinLeftSemiMultiKeyOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash set information for this specialized class. - */ + /* + * Get our Multi-Key hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -159,11 +149,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public VectorMapJoinLeftSemiStringOperator(CompilationOpContext ctx, OperatorDes // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash set information for this specialized class. - */ + /* + * Get our Single-Column String hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 8a6c817..061253f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,16 +24,23 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; /** @@ -114,6 +121,11 @@ protected transient int[] noMatchs; protected transient int[] merged; + protected transient boolean isFullOuterForwardKeysToIntersect; + protected transient WriteBuffers.Position fullOuterIntersectReadPos; + + protected transient int[] fullOuterForwardKeys; + /** Kryo ctor. */ protected VectorMapJoinOuterGenerateResultOperator() { super(); @@ -131,32 +143,37 @@ public VectorMapJoinOuterGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our outer join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Outer join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - inputSelected = new int[batch.DEFAULT_SIZE]; + inputSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - allMatchs = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + nonSpills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + noMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; + merged = new int[VectorizedRowBatch.DEFAULT_SIZE]; - nonSpills = new int[batch.DEFAULT_SIZE]; - noMatchs = new int[batch.DEFAULT_SIZE]; - merged = new int[batch.DEFAULT_SIZE]; + matchTracker = null; + isFullOuterForwardKeysToIntersect = false; + fullOuterIntersectReadPos = null; + fullOuterForwardKeys = null; } @@ -174,15 +191,16 @@ protected void outerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector bigTableOuterKeyColumn = batch.cols[column]; - bigTableOuterKeyColumn.reset(); - } } /** @@ -569,27 +587,28 @@ public void finishOuter(VectorizedRowBatch batch, protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, int noMatchSize) throws IOException, HiveException { - // Set null information in the small table results area. + // Set null information in the small table results area. - for (int i = 0; i < noMatchSize; i++) { - int batchIndex = noMatchs[i]; + for (int i = 0; i < noMatchSize; i++) { + int batchIndex = noMatchs[i]; - // Mark any scratch small table scratch columns that would normally receive a copy of the - // key as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } - // Small table values are set to null. - for (int column : smallTableOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } - } - } + // Small table values are set to null. + for (int column : smallTableValueColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } + } + } /** * Generate the outer join output results for one vectorized row batch with a repeated key. @@ -734,20 +753,365 @@ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult jo */ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { - for (int column : smallTableOutputVectorColumns) { + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } - // Mark any scratch small table scratch columns that would normally receive a copy of the key - // as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } } + + private void markBigTableColumnsAsNullRepeating() { + + /* + * For non-match FULL OUTER Small Table results, the Big Table columns are all NULL. + */ + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.isRepeating = true; + colVector.noNulls = false; + colVector.isNull[0] = true; + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table keys and values and odd them to the + * join output result. + * + * When a Shared-Memory (i.e. instead of a Dynamic Partition Hash Join) is planned, we do the + * generation in the INTERSECT Reducer. + */ + @Override + protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, + MapJoinTableContainer substituteSmallTable) throws HiveException { + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + /* + * For dynamic partition hash join, both the Big Table and Small Table are partitioned (sent) + * to the Reducer using the key hash code. So, we can generate the non-match Small Table + * results locally. + * + * Or, for Intersect, we have been tracking the matched keys received from all the FULL OUTER + * MapJoin operators. So, we can generate the non-match Small Table results in this + * centralized operator. + * + * Scan the Small Table for keys that didn't match and generate the non-matchs into the + * overflowBatch. + */ + + /* + * If there were no matched keys sent, we need to do our common initialization. + */ + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + /* + * To support fancy NULL repeating columns, let's flush the overflowBatch if it has anything. + */ + if (overflowBatch.size > 0) { + forwardOverflow(); + } + markBigTableColumnsAsNullRepeating(); + + switch (hashTableKeyType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + generateFullOuterLongKeySmallTableNoMatches(); + break; + case STRING: + generateFullOuterStringKeySmallTableNoMatches(); + break; + case MULTI_KEY: + generateFullOuterMultiKeySmallTableNoMatches(); + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType); + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table Long keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterLongKeySmallTableNoMatches() + throws HiveException { + + final LongColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (LongColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinLongHashMap hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final long longKey; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (!isKeyNull) { + longKey = nonMatchedIterator.getNonMatchedLongKey(); + } else { + longKey = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.vector[overflowBatch.size] = longKey; + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + private void doSmallTableKeyDeserializeRow(VectorizedRowBatch batch, int batchIndex, + byte[] keyBytes, int keyOffset, int keyLength) + throws HiveException { + + smallTableKeyOuterVectorDeserializeRow.setBytes(keyBytes, keyOffset, keyLength); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + smallTableKeyOuterVectorDeserializeRow.deserializeByRef(batch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + smallTableKeyOuterVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table Multi-Keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterMultiKeySmallTableNoMatches() throws HiveException { + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + nonMatchedIterator.readNonMatchedBytesKey(); + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + final int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + final int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (smallTableKeyOuterVectorDeserializeRow != null) { + doSmallTableKeyDeserializeRow(overflowBatch, overflowBatch.size, + keyBytes, keyOffset, keyLength); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + + // NOTE: We don't have to deal with FULL OUTER All-NULL key values like we do for single-column + // LONG and STRING because we do store them in the hash map... + } + + /* + * For FULL OUTER MapJoin, find the non matched Small Table String keys and values and odd them to + * the join output result. + */ + protected void generateFullOuterStringKeySmallTableNoMatches() throws HiveException { + + final BytesColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (BytesColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final byte[] keyBytes; + final int keyOffset; + final int keyLength; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedBytesKey(); + if (!isKeyNull) { + keyBytes = nonMatchedIterator.getNonMatchedBytes(); + keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + } else { + keyBytes = null; + keyOffset = 0; + keyLength = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.setVal( + overflowBatch.size, + keyBytes, keyOffset, keyLength); + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + protected void fullOuterHashTableSetup() { + + // Always track key matches for FULL OUTER. + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + if (!conf.isDynamicPartitionHashJoin()) { + + // When the Small Table is shared among all Reducers, FULL OUTER MapJoin we must forward + // matched keys to Intersect. + isFullOuterForwardKeysToIntersect = true; + fullOuterForwardKeys = new int[VectorizedRowBatch.DEFAULT_SIZE]; + } + } + + protected void fullOuterIntersectHashTableSetup() { + + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + fullOuterIntersectReadPos = new WriteBuffers.Position(); + } + + protected void forwardFullOuterKeysToInterset(VectorizedRowBatch batch, + int fullOuterForwardKeyCount) throws HiveException { + + // Save original projection. + int[] originalProjections = batch.projectedColumns; + int originalProjectionSize = batch.projectionSize; + + // Save selected. + int[] originalSelected = batch.selected; + boolean originalSelectedInUse = batch.selectedInUse; + int originalSize = batch.size; + + // Project with the output of our operator. + batch.projectionSize = outputProjection.length; + batch.projectedColumns = outputProjection; + + // Forward just the rows whose key had a first-time match. + batch.selected = fullOuterForwardKeys; + batch.selectedInUse = true; + batch.size = fullOuterForwardKeyCount; + + vectorForwardAuxiliary(batch); + + // Revert the projected columns back, because batch can be re-used by our parent operators. + batch.projectionSize = originalProjectionSize; + batch.projectedColumns = originalProjections; + + batch.selected = originalSelected; + batch.selectedInUse = originalSelectedInUse; + batch.size = originalSize; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index be05cc2..883c6e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -65,7 +64,7 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinLongHashMap hashMap; + protected transient VectorMapJoinLongHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column Long specific members. @@ -77,7 +76,7 @@ protected String getLoggingPrefix() { private transient long max; // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -102,55 +101,41 @@ public VectorMapJoinOuterLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Single-Column Long members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + singleJoinColumn = bigTableKeyColumnMap[0]; + } - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -160,9 +145,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -174,19 +156,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -205,6 +174,11 @@ public void process(Object row, int tag) throws HiveException { long[] vector = joinColVector.vector; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column Long check for repeating. */ @@ -234,12 +208,16 @@ public void process(Object row, int tag) throws HiveException { } else { // Handle *repeated* join key, if found. long key = vector[0]; - // LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMap.lookup(key, hashMapResults[0]); + joinResult = hashMap.lookup(key, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } } @@ -247,9 +225,6 @@ public void process(Object row, int tag) throws HiveException { * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -258,10 +233,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -286,8 +257,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column Long outer null detection. */ @@ -305,7 +274,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -354,23 +322,25 @@ public void process(Object row, int tag) throws HiveException { // Key out of range for whole hash table. saveJoinResult = JoinUtil.JoinResult.NOMATCH; } else { - saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount], + matchTracker); } - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name()); - /* * Common outer join result processing. */ switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -381,11 +351,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -393,7 +361,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -403,13 +370,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -451,9 +414,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 70f88e3..3050333 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -69,17 +68,17 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Multi-Key specific members. // // Object that can take a set of columns in row in a vectorized row batch and serialized it. - private transient VectorSerializeRow keyVectorSerializeWrite; + protected transient VectorSerializeRow keyVectorSerializeWrite; // The BinarySortable serialization of the current key. - private transient Output currentKeyOutput; + protected transient Output currentKeyOutput; // The BinarySortable serialization of the saved key for a possible series of equal keys. private transient Output saveKeyOutput; @@ -107,55 +106,41 @@ public VectorMapJoinOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ + /* + * Initialize Multi-Key members for this specialized class. + */ - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - needCommonSetup = false; - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash map information for this specialized class. + */ - /* - * Get our Multi-Key hash map information for this specialized class. - */ + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -165,9 +150,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -179,19 +161,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -208,6 +177,11 @@ public void process(Object row, int tag) throws HiveException { // None. /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Multi-Key Long check for repeating. */ @@ -259,16 +233,18 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.serializeWrite(batch, 0); byte[] keyBytes = currentKeyOutput.getData(); int keyLength = currentKeyOutput.getLength(); - joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -277,10 +253,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -305,8 +277,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Multi-Key outer null detection. */ @@ -325,7 +295,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -375,7 +344,9 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, + hashMapResults[hashMapResultCount], matchTracker); + /* * Common outer join result processing. @@ -383,12 +354,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -399,11 +373,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -411,7 +383,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -421,13 +392,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -469,9 +436,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 714f5ec..5c9137f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -65,14 +64,14 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column String specific members. // // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -97,50 +96,36 @@ public VectorMapJoinOuterStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Single-Column String members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column String members for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - singleJoinColumn = bigTableKeyColumnMap[0]; + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column String hash map information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - /* - * Get our Single-Column String hash map information for this specialized class. - */ - - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -150,33 +135,17 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } // Filtering for outer join just removes rows available for hash table matching. - boolean someRowsFilteredOut = false; + boolean someRowsFilteredOut = false; if (bigTableFilterExpressions.length > 0) { // Since the input for (VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -197,6 +166,11 @@ public void process(Object row, int tag) throws HiveException { int[] length = joinColVector.length; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column String check for repeating. */ @@ -228,7 +202,13 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[0]; int keyStart = start[0]; int keyLength = length[0]; - joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup( + keyBytes, keyStart, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* @@ -246,10 +226,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -274,8 +250,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column String outer null detection. */ @@ -293,7 +267,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -343,7 +316,8 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, + hashMapResults[hashMapResultCount], matchTracker); /* * Common outer join result processing. @@ -351,12 +325,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -367,11 +344,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -379,7 +354,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -389,13 +363,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -437,9 +407,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..10973ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,9 +23,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -46,12 +50,115 @@ protected BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedBytesHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastBytesHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastBytesHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedReadPos = new WriteBuffers.Position(); + nonMatchedKeyByteSegmentRef = new ByteSegmentRef(); + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount) { + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedTripleIndex = nonMatchedLogicalSlotNum * 3; + if (hashMap.slotTriples[nonMatchedTripleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotTriples[nonMatchedTripleIndex + 2]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + if (keyIsNull) { + return false; + } + hashMap.keyStore.getKey( + hashMap.slotTriples[nonMatchedLogicalSlotNum * 3], + nonMatchedKeyByteSegmentRef, + nonMatchedReadPos); + return true; + } + + @Override + public byte[] getNonMatchedBytes() { + return nonMatchedKeyByteSegmentRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) nonMatchedKeyByteSegmentRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return nonMatchedKeyByteSegmentRef.getLength(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } + + @Override public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue) { @@ -64,31 +171,56 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } } @Override - public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long valueRefWord = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (valueRefWord == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); + + joinResult = JoinUtil.JoinResult.MATCH; + } - optimizedHashMapResult.set(valueStore, valueRefWord); + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) { + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + JoinUtil.JoinResult joinResult; + if (tripleIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -98,10 +230,44 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, return joinResult; } + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, readPos); + if (tripleIndex != -1) { + matchTracker.trackMatch(tripleIndex / 3); + } + return true; + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastBytesHashMap( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + fullOuterNullKeyValueRef = 0; + valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..c0295dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -57,10 +57,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -75,13 +73,20 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long count = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ + optimizedHashMultiSetResult.set(slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..e99a029 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -65,11 +65,19 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long existance = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..dcb89b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -70,13 +70,11 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr while (true) { int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } @@ -150,7 +148,6 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotTriples[newTripleIndex] = keyRef; newSlotTriples[newTripleIndex + 1] = hashCode; @@ -165,10 +162,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected final long findReadSlot( + protected final int findReadSlot( byte[] keyBytes, int keyStart, int keyLength, long hashCode, WriteBuffers.Position readPos) { int intHashCode = (int) hashCode; @@ -177,7 +173,6 @@ protected final long findReadSlot( int i = 0; while (true) { int tripleIndex = slot * 3; - // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); if (slotTriples[tripleIndex] == 0) { // Given that we do not delete, an empty slot means no match. return -1; @@ -185,7 +180,7 @@ protected final long findReadSlot( // Finally, verify the key bytes match. if (keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength, readPos)) { - return slotTriples[tripleIndex + 2]; + return tripleIndex; } } // Some other key (collision) - keep probing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1..bcc9cb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -22,7 +22,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); @@ -96,4 +98,19 @@ public long getEstimatedMemorySize() { JavaDataModel jdm = JavaDataModel.get(); return JavaDataModel.alignUp(10L * jdm.primitive1() + jdm.primitive2(), jdm.memoryAlign()); } + + @Override + public MatchTracker createMatchTracker() { + return MatchTracker.create(logicalHashBucketCount); + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public int spillPartitionId() { + throw new RuntimeException("Not implemented"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index b6684e0..0a3c84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; // Optimized for sequential key lookup. @@ -124,13 +125,11 @@ public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, in public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, WriteBuffers.Position readPos) { - int storedKeyLengthLength = + int storedKeyLength = (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); - // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); - - if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { + if (isKeyLengthSmall && storedKeyLength != keyLength) { return false; } long absoluteKeyOffset = @@ -139,16 +138,14 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL writeBuffers.setReadPoint(absoluteKeyOffset, readPos); if (!isKeyLengthSmall) { // Read big value length we wrote with the value. - storedKeyLengthLength = writeBuffers.readVInt(readPos); - if (storedKeyLengthLength != keyLength) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { return false; } } // Our reading is positioned to the key. if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); return false; } @@ -174,4 +171,23 @@ public long getEstimatedMemorySize() { size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); return size; } + + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index f42430d..fc0a093 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -22,13 +22,17 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -41,17 +45,117 @@ extends VectorMapJoinFastLongHashTable implements VectorMapJoinLongHashMap, MemoryEstimate { - public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + // public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + + private final boolean isSaveNullKeyValuesForFullOuter; protected VectorMapJoinFastValueStore valueStore; private BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedLongHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastLongHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount){ + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedDoubleIndex = nonMatchedLogicalSlotNum * 2; + if (hashMap.slotPairs[nonMatchedDoubleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotPairs[nonMatchedDoubleIndex]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedLongKey() { + return !keyIsNull; + } + + @Override + public long getNonMatchedLongKey() { + return hashMap.slotPairs[nonMatchedLogicalSlotNum * 2 + 1]; + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); + } + + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + + } + } + /* * A Unit Test convenience method for putting key and value into the hash table using the * actual types. @@ -91,13 +195,12 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); - long valueRef = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (valueRef == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMapResult.set(valueStore, valueRef); + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -107,12 +210,74 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre return joinResult; } + @Override + public JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) { + + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); + + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + if (pairIndex != -1) { + matchTracker.trackMatch(pairIndex / 2); + } + return true; + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastLongHashMap( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); + fullOuterNullKeyValueRef = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 228fa72..eda8a56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -42,11 +42,29 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMultiSet.class); + private final boolean isSaveNullKeyValuesForFullOuter; + + private long fullOuterNullKeyValueCount; + @Override public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + + } + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -80,12 +98,19 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long count = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ + optimizedHashMultiSetResult.set(slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -95,10 +120,14 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre } public VectorMapJoinFastLongHashMultiSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..14b1965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public VectorMapJoinHashSetResult createHashSetResult() { return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). + adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long existance = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,10 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..8b775fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public long max() { return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); + return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot = intHashCode & logicalHashBucketMask; @@ -230,20 +227,16 @@ protected long findReadSlot(long key, long hashCode) { long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { // Given that we do not delete, an empty slot means no match. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); return -1; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); - return slotPairs[pairIndex]; + return pairIndex; } // Some other key (collision) - keep probing. probeSlot += (++i); if (i > largestNumberOfSteps) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found"); // We know we never went that far when we were inserting. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); return -1; } slot = (int)(probeSlot & logicalHashBucketMask); @@ -268,10 +261,10 @@ private void allocateBucketArray() { } public VectorMapJoinFastLongHashTable( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - this.isOuterJoin = isOuterJoin; this.hashTableKeyType = hashTableKeyType; PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() }; keyBinarySortableDeserializeRead = diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 2798010..4a63772 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -49,8 +49,8 @@ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveExcept } public VectorMapJoinFastMultiKeyHashMap( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index 0560281..31aa95f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -47,8 +47,8 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashMultiSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index 900ca55..ed8b989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -47,8 +47,7 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java new file mode 100644 index 0000000..3d29cf4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinFastNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinFastNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public void init() { + nonMatchedLogicalSlotNum = -1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 777eb45..1b108a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -35,11 +35,9 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastStringCommon.class); - private boolean isOuterJoin; - private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, + public boolean adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); @@ -47,7 +45,7 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -61,14 +59,14 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.currentBytesStart, keyBinarySortableDeserializeRead.currentBytesLength, currentValue); + return true; } - public VectorMapJoinFastStringCommon(boolean isOuterJoin) { - this.isOuterJoin = isOuterJoin; + public VectorMapJoinFastStringCommon() { PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index fc4edda..56068f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -30,18 +30,27 @@ */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { + private final boolean isSaveNullKeyValuesForFullOuter; + private VectorMapJoinFastStringCommon stringCommon; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + } } public VectorMapJoinFastStringHashMap( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 3dbdfa7..911a61e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -30,18 +30,30 @@ */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { - private VectorMapJoinFastStringCommon stringCommon; + private final boolean isSaveNullKeyValuesForFullOuter; + + private final VectorMapJoinFastStringCommon stringCommon; + + private long fullOuterNullKeyValueCount; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + } } public VectorMapJoinFastStringHashMultiSet( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 84f8439..3dc7847 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -33,15 +33,17 @@ private VectorMapJoinFastStringCommon stringCommon; @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). stringCommon.adaptPutRow(this, currentKey, currentValue); } public VectorMapJoinFastStringHashSet( - boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 24dfa5d..3e41ec0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -73,11 +75,6 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, this.estimatedKeyCount = estimatedKeyCount; - // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); - // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); - // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); - // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, estimatedKeyCount); @@ -93,13 +90,11 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { - boolean isOuterJoin = !desc.isNoOuterJoin(); - - // UNDONE VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.getHashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean isSaveNullKeyValuesForFullOuter = vectorDesc.getIsSaveNullKeyValuesForFullOuter(); boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); @@ -115,18 +110,23 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastLongHashMap( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastLongHashMultiSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastLongHashSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -135,18 +135,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastStringHashMap( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastStringHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastStringHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -155,18 +154,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastMultiKeyHashMap( - isOuterJoin, + isSaveNullKeyValuesForFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastMultiKeyHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastMultiKeyHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -195,6 +193,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public void clear() { // Do nothing } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java index 2408484..a530cca 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single byte array key hash map lookup method. @@ -41,6 +43,9 @@ * The object to receive small table value(s) information on a MATCH. * Or, for SPILL, it has information on where to spill the big table row. * + * NOTE: Since the hash table can be shared, the hashMapResult serves as the non-shared + * private object for our accessing the hash table lookup values, etc. + * * @return * Whether the lookup was a match, no match, or spill (the partition with the key * is currently spilled). @@ -48,4 +53,31 @@ JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException; + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + * + * Lookup a byte array key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * + */ + boolean lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java index 2d2490c..5762cff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java @@ -30,5 +30,4 @@ * access spill information when the partition with the key is currently spilled. */ VectorMapJoinHashMapResult createHashMapResult(); - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java index e49da04..ce5c597 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; @@ -31,7 +32,6 @@ */ public interface VectorMapJoinHashTable extends MemoryEstimate { - /* * @param currentKey * The current key. @@ -45,4 +45,10 @@ void putRow(BytesWritable currentKey, BytesWritable currentValue) * Get hash table size */ int size(); + + MatchTracker createMatchTracker(); + + VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker); + + int spillPartitionId(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java index ba68d35..a54e501 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single long key hash map lookup method. @@ -43,4 +45,31 @@ */ JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) throws IOException; + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + * + * Lookup a long key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * ... + */ + boolean lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) + throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java index d0f9dcb..74cfb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java @@ -27,5 +27,4 @@ boolean useMinMax(); long min(); long max(); - } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java new file mode 100644 index 0000000..911485b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinNonMatchedIterator { + + protected final MatchTracker matchTracker; + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinNonMatchedIterator(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + } + + public void init() { + nonMatchedLogicalSlotNum = -1; + } + + public boolean findNextNonMatched() { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public long getNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedBytesKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public byte[] getNonMatchedBytes() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesOffset() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesLength() { + throw new RuntimeException("Not implemented"); + } + + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + throw new RuntimeException("Not implemented"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f95cd76..21c355c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -116,16 +116,4 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, } return hashTable; } - - /* - @Override - public com.esotericsoftware.kryo.io.Output getHybridBigTableSpillOutput(int partitionId) { - - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTableContainer; - - HashPartition hp = ht.getHashPartitions()[partitionId]; - - return hp.getMatchfileOutput(); - } - */ } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java index 9242702..b682449 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java @@ -23,10 +23,14 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; public class VectorMapJoinOptimizedHashMap @@ -40,13 +44,18 @@ public VectorMapJoinHashMapResult createHashMapResult() { public static class HashMapResult extends VectorMapJoinHashMapResult { - private BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; + private final BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; public HashMapResult() { super(); bytesBytesMultiHashMapResult = new BytesBytesMultiHashMap.Result(); } + public HashMapResult(BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult) { + super(); + this.bytesBytesMultiHashMapResult = bytesBytesMultiHashMapResult; + } + public BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult() { return bytesBytesMultiHashMapResult; } @@ -106,7 +115,59 @@ public String toString() { public String getDetailedHashMapResultPositionString() { return "(Not supported yet)"; } - } + } + + protected static class NonMatchedBytesHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedHashMap hashMap; + + protected ByteSegmentRef keyRef; + + public NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + } + + public void doReadNonMatchedBytesKey() throws HiveException { + keyRef = nonMatchedIterator.getCurrentKeyAsRef(); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + doReadNonMatchedBytesKey(); + return true; // We have not interpreted the bytes, so return true. + } + + @Override + public byte[] getNonMatchedBytes() { + return keyRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) keyRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return keyRef.getLength(); + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } @Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, @@ -117,11 +178,36 @@ public String getDetailedHashMapResultPositionString() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMapResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMapResult); + (VectorMapJoinHashTableResult) hashMapResult, null); return joinResult; } + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + HashMapResult implementationHashMapResult = (HashMapResult) hashMapResult; + + JoinUtil.JoinResult joinResult = + doLookup(keyBytes, keyOffset, keyLength, + implementationHashMapResult.bytesBytesMultiHashMapResult(), + (VectorMapJoinHashTableResult) hashMapResult, matchTracker); + + return joinResult; + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + return doLookupNoResult(keyBytes, keyOffset, keyLength, readPos, matchTracker); + } + public VectorMapJoinOptimizedHashMap( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java index 9921a88..cfe128c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java @@ -91,7 +91,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMultiSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMultiSetResult); + (VectorMapJoinHashTableResult) hashMultiSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java index 122f881..8f53ada 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java @@ -66,7 +66,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashSetResult); + (VectorMapJoinHashTableResult) hashSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java index 74887f7..4efe1da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java @@ -26,21 +26,26 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; -/* +/** * Root interface for a vector map join hash table (which could be a hash map, hash multi-set, or * hash set). */ -public abstract class VectorMapJoinOptimizedHashTable implements VectorMapJoinHashTable { +public abstract class VectorMapJoinOptimizedHashTable + implements VectorMapJoinHashTable, VectorMapJoinBytesHashTable { private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOptimizedMultiKeyHashMap.class.getName()); @@ -55,6 +60,16 @@ } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public int spillPartitionId() { + return adapatorDirectAccess.directSpillPartitionId(); + } + + @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws SerDeException, HiveException, IOException { @@ -69,13 +84,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) public JoinUtil.JoinResult doLookup(byte[] keyBytes, int keyOffset, int keyLength, BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult, - VectorMapJoinHashTableResult hashTableResult) { + VectorMapJoinHashTableResult hashTableResult, MatchTracker matchTracker) { hashTableResult.forget(); JoinUtil.JoinResult joinResult = adapatorDirectAccess.setDirect(keyBytes, keyOffset, keyLength, - bytesBytesMultiHashMapResult); + bytesBytesMultiHashMapResult, matchTracker); if (joinResult == JoinUtil.JoinResult.SPILL) { hashTableResult.setSpillPartitionId(adapatorDirectAccess.directSpillPartitionId()); } @@ -85,6 +100,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) return joinResult; } + public boolean doLookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + return adapatorDirectAccess.setDirectNoResult( + keyBytes, keyOffset, keyLength, readPos, matchTracker); + } + public VectorMapJoinOptimizedHashTable( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { @@ -105,4 +127,9 @@ public long getEstimatedMemorySize() { size += (2 * JavaDataModel.get().object()); return size; } + + @Override + public MatchTracker createMatchTracker() { + return adapatorDirectAccess.createMatchTracker(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java index 9c45ed9..de1ee15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java @@ -41,8 +41,6 @@ private HashTableKeyType hashTableKeyType; - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index b21f0b3..895593f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -23,9 +23,17 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * An single long value hash map based on the BytesBytesMultiHashMap. @@ -37,8 +45,104 @@ extends VectorMapJoinOptimizedHashMap implements VectorMapJoinLongHashMap { + private HashTableKeyType hashTableKeyType; + private VectorMapJoinOptimizedLongCommon longCommon; + private static class NonMatchedLongHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedLongHashMap hashMap; + + // Extract long with non-shared deserializer object. + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + private long longValue; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + + TypeInfo integerTypeInfo; + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + integerTypeInfo = TypeInfoFactory.booleanTypeInfo; + break; + case BYTE: + integerTypeInfo = TypeInfoFactory.byteTypeInfo; + break; + case SHORT: + integerTypeInfo = TypeInfoFactory.shortTypeInfo; + break; + case INT: + integerTypeInfo = TypeInfoFactory.intTypeInfo; + break; + case LONG: + integerTypeInfo = TypeInfoFactory.longTypeInfo; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead( + new TypeInfo[] {integerTypeInfo}, false); + } + + private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveException { + + try { + byte[] keyBytes = keyRef.getBytes(); + int keyOffset = (int) keyRef.getOffset(); + int keyLength = keyRef.getLength(); + keyBinarySortableDeserializeRead.set(keyBytes, keyOffset, keyLength); + if (!keyBinarySortableDeserializeRead.readNextField()) { + return false; + } + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0; + break; + case BYTE: + longValue = keyBinarySortableDeserializeRead.currentByte; + break; + case SHORT: + longValue = keyBinarySortableDeserializeRead.currentShort; + break; + case INT: + longValue = keyBinarySortableDeserializeRead.currentInt; + break; + case LONG: + longValue = keyBinarySortableDeserializeRead.currentLong; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + } catch (IOException e) { + throw new HiveException(e); + } + return true; + } + + @Override + public boolean readNonMatchedLongKey() throws HiveException { + return readNonMatchedLongKey(nonMatchedIterator.getCurrentKeyAsRef()); + } + + @Override + public long getNonMatchedLongKey() throws HiveException { + return longValue; + } + } + @Override public boolean useMinMax() { return longCommon.useMinMax(); @@ -54,14 +158,10 @@ public long max() { return longCommon.max(); } - /* @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { - - longCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(long key, @@ -73,10 +173,36 @@ public JoinResult lookup(long key, hashMapResult); } + @Override + public JoinResult lookup(long key, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean lookupNoResult(long key, WriteBuffers.Position readPos, + MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + return super.lookupNoResult( + serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedLongHashMap( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); + this.hashTableKeyType = hashTableKeyType; longCommon = new VectorMapJoinOptimizedLongCommon(minMaxEnabled, isOuterJoin, hashTableKeyType); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java index 3e8e6fb..e07bbaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java @@ -27,8 +27,6 @@ public class VectorMapJoinOptimizedMultiKeyHashMap extends VectorMapJoinOptimizedHashMap { - // UNDONE: How to look for all NULLs in a multi-key????? Let nulls through for now. - public VectorMapJoinOptimizedMultiKeyHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java new file mode 100644 index 0000000..6f9c770 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized; + +import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.HashMapResult; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinOptimizedNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected NonMatchedSmallTableIterator nonMatchedIterator; + + protected HashMapResult nonMatchedHashMapResult; + + public VectorMapJoinOptimizedNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public boolean findNextNonMatched() { + return nonMatchedIterator.isNext(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + if (nonMatchedHashMapResult == null) { + nonMatchedHashMapResult = new HashMapResult(nonMatchedIterator.getHashMapResult()); + } + nonMatchedHashMapResult.setJoinResult(JoinResult.MATCH); + return nonMatchedHashMapResult; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java index a8ccfa4..da0e836 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java @@ -26,19 +26,9 @@ /* * An single byte array value hash map based on the BytesBytesMultiHashMap. - * - * Since BytesBytesMultiHashMap does not interpret the key as BinarySortable we optimize - * this case and just reference the byte array key directly for the lookup instead of serializing - * the byte array into BinarySortable. We rely on it just doing byte array equality comparisons. */ public class VectorMapJoinOptimizedStringCommon { - // private boolean isOuterJoin; - - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - - // private ReadStringResults readStringResults; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; @@ -55,18 +45,13 @@ public SerializedBytes serialize(byte[] keyBytes, int keyStart, int keyLength) t serializedBytes.length = output.getLength(); return serializedBytes; - } public VectorMapJoinOptimizedStringCommon(boolean isOuterJoin) { - // this.isOuterJoin = isOuterJoin; - // PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; - // keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos); - // readStringResults = keyBinarySortableDeserializeRead.createReadStringResults(); - // bytesWritable = new BytesWritable(); + keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(1); output = new Output(); keyBinarySortableSerializeWrite.set(output); serializedBytes = new SerializedBytes(); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java index f2074ec..7de0a85 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java @@ -22,12 +22,19 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* - * An multi-key hash map based on the BytesBytesMultiHashMap. + * An string hash map based on the BytesBytesMultiHashMap. */ public class VectorMapJoinOptimizedStringHashMap extends VectorMapJoinOptimizedHashMap @@ -35,14 +42,59 @@ private VectorMapJoinOptimizedStringCommon stringCommon; - /* - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { + private static class NonMatchedStringHashMapIterator extends NonMatchedBytesHashMapIterator { + + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + NonMatchedStringHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedStringHashMap hashMap) { + super(matchTracker, hashMap); + } + + @Override + public void init() { + super.init(); + + TypeInfo[] typeInfos = new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */ false); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + super.doReadNonMatchedBytesKey(); - stringCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + byte[] bytes = keyRef.getBytes(); + final int keyOffset = (int) keyRef.getOffset(); + final int keyLength = keyRef.getLength(); + try { + keyBinarySortableDeserializeRead.set(bytes, keyOffset, keyLength); + return keyBinarySortableDeserializeRead.readNextField(); + } catch (IOException e) { + throw new HiveException(e); + } + } + + @Override + public byte[] getNonMatchedBytes() { + return keyBinarySortableDeserializeRead.currentBytes; + } + + @Override + public int getNonMatchedBytesOffset() { + return keyBinarySortableDeserializeRead.currentBytesStart; + } + + @Override + public int getNonMatchedBytesLength() { + return keyBinarySortableDeserializeRead.currentBytesLength; + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedStringHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, @@ -55,6 +107,31 @@ public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, } + @Override + public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyStart, keyLength); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + + } + + /* + * This variation is used by FULL OUTER INTERSECT MapJoin. It does key match tracking for + * intersect purposes but does not return Small Table values. + */ + @Override + public boolean lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyOffset, keyLength); + + return doLookupNoResult(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedStringHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index a235f3f..3857d76 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -83,7 +83,6 @@ private static final Logger LOG = LoggerFactory.getLogger(ConvertJoinMapJoin.class.getName()); - @Override /* * (non-Javadoc) we should ideally not modify the tree we traverse. However, @@ -119,6 +118,7 @@ boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); if (!hiveConvertJoin) { + // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize); @@ -182,6 +182,23 @@ // reduced by 1 mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); mapJoinOp.setStatistics(joinOp.getStatistics()); + + /* + * Replan for FULL OUTER MapJoin to add INTERSECT Reducer if we are doing a Shared-Memory plan. + * + * Fixup the MapJoinDesc to get rid of Filter Maps. + */ + JoinCondDesc[] conds = joinOp.getConf().getConds(); + if (conds.length == 1 && conds[0].getType() == JoinDesc.FULL_OUTER_JOIN) { + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + + FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc); + + if (!mapJoinDesc.isDynamicPartitionHashJoin()) { + FullOuterMapJoinOptimization.generateSharedMemoryPlan(mapJoinOp); + } + } + // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { setAllChildrenTraits(childOp, mapJoinOp.getOpTraits()); @@ -741,6 +758,8 @@ private boolean isCrossProduct(JoinOperator joinOp) { public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext context, int buckets, boolean skipJoinTypeChecks, long maxSize, boolean checkMapJoinThresholds) throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + JoinCondDesc[] conds = joinDesc.getConds(); if (!skipJoinTypeChecks) { /* * HIVE-9038: Join tests fail in tez when we have more than 1 join on the same key and there is @@ -749,14 +768,19 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * new operation to be able to support this. This seems like a corner case enough to special * case this for now. */ - if (joinOp.getConf().getConds().length > 1) { + if (conds.length > 1) { if (hasOuterJoin(joinOp)) { return -1; } } } + + // Is a FULL OUTER JOIN being done? Is planning a FULL OUTER MapJoin enabled? + boolean isEnableFullOuterMapJoin = + MapJoinProcessor.determineEnableFullOuterMapJoin(context.conf, joinOp); + Set bigTableCandidateSet = - MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); + MapJoinProcessor.getBigTableCandidates(conds, isEnableFullOuterMapJoin); int bigTablePosition = -1; // big input cumulative row count long bigInputCumulativeCardinality = -1L; @@ -937,13 +961,13 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); - mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); - List joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next(); + List joinExprs = mapJoinDesc.getKeys().values().iterator().next(); if (joinExprs.size() == 0) { // In case of cross join, we disable hybrid grace hash join - mapJoinOp.getConf().setHybridHashJoin(false); + mapJoinDesc.setHybridHashJoin(false); } - Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); if (parentBigTableOp instanceof ReduceSinkOperator) { @@ -1159,7 +1183,14 @@ private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, Optim MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false); if (mapJoinOp != null) { LOG.info("Selected dynamic partitioned hash join"); - mapJoinOp.getConf().setDynamicPartitionHashJoin(true); + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setDynamicPartitionHashJoin(true); + JoinCondDesc[] conds = mapJoinDesc.getConds(); + if (conds.length == 1 && conds[0].getType() == JoinDesc.FULL_OUTER_JOIN) { + + // Fixup the MapJoinDesc to get rid of Filter Maps. + FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc); + } // Set OpTraits for dynamically partitioned hash join: // bucketColNames: Re-use previous joinOp's bucketColNames. Parent operators should be // reduce sink, which should have bucket columns based on the join keys. @@ -1225,8 +1256,9 @@ private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int positi List columnStats = new ArrayList<>(); for (String key : keys) { ColStatistics cs = inputStats.getColumnStatisticsFromColName(key); + LOG.debug("Statistics obtained for {} of reduce sink operator {}: {}", + key, rsOp.toString(), (cs != null)); if (cs == null) { - LOG.debug("Couldn't get statistics for: {}", key); return true; } columnStats.add(cs); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 0000000..72f523a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,446 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.FullOuterMapJoinBigTableInfo; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Preconditions; +import com.google.common.base.Joiner; + +/** + * FULL OUTER MapJoin planning. + */ +public class FullOuterMapJoinOptimization { + + /** + * Add 2nd ReduceSink for sending Small Table to FULL OUTER INTERSECT MapJoin, too. + */ + private static ReduceSinkOperator addIntersectSmallTableReduceSink( + MapJoinOperator mapJoinOp, int posSmallTable) + throws SemanticException { + + List> mapJoinParents = mapJoinOp.getParentOperators(); + Preconditions.checkState(mapJoinParents.get(posSmallTable) instanceof ReduceSinkOperator); + + ReduceSinkOperator smallTableReduceSink = + (ReduceSinkOperator) mapJoinOp.getParentOperators().get(posSmallTable); + + List> smallTableReduceSinkParents = + smallTableReduceSink.getParentOperators(); + Preconditions.checkState(smallTableReduceSinkParents.size() == 1); + + Operator smallTableReduceSinkParent = + smallTableReduceSinkParents.get(0); + + ReduceSinkDesc intersectSmallTableReduceSinkDesc = + (ReduceSinkDesc) smallTableReduceSink.getConf().clone(); + intersectSmallTableReduceSinkDesc.setPartitionCols(new ArrayList()); + intersectSmallTableReduceSinkDesc.setNumReducers(1); + intersectSmallTableReduceSinkDesc.setOutputName("intersect"); + + ReduceSinkOperator intersectSmallTableReduceSink = + (ReduceSinkOperator) OperatorFactory.get( + smallTableReduceSink.getCompilationOpContext(), + intersectSmallTableReduceSinkDesc); + intersectSmallTableReduceSink.setColumnExprMap(new HashMap()); + + // Connect smallTableReduceSinkParent and intersectSmallTableReduceSink. + smallTableReduceSinkParent.getChildOperators().add(intersectSmallTableReduceSink); + intersectSmallTableReduceSink.getParentOperators().add(smallTableReduceSinkParent); + + return intersectSmallTableReduceSink; + } + + /* + * Create FULL OUTER INTERSECT MapJoin. + */ + private static MapJoinOperator createIntersectMapJoin( + ReduceSinkOperator intersectSmallTableReduceSink, + List> intersectMapJoinParents, + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, int posSmallTable) + throws SemanticException{ + + MapJoinDesc interceptMapJoinDesc = new MapJoinDesc(mapJoinDesc); + interceptMapJoinDesc.setStatistics(mapJoinDesc.getStatistics()); + interceptMapJoinDesc.setTagOrder(mapJoinDesc.getTagOrder()); + interceptMapJoinDesc.setNullSafes(mapJoinDesc.getNullSafes()); + interceptMapJoinDesc.setFilterMap(mapJoinDesc.getFilterMap()); + + interceptMapJoinDesc.setResidualFilterExprs(mapJoinDesc.getResidualFilterExprs()); + interceptMapJoinDesc.setColumnExprMap(mapJoinDesc.getColumnExprMap()); + + interceptMapJoinDesc.setFullOuterIntersect(true); + + MapJoinOperator intersectMapJoinOp = + (MapJoinOperator) OperatorFactory.get( + mapJoinOp.getCompilationOpContext(), + interceptMapJoinDesc); + + // Make intersectMapJoin a child of intersectSmallTableReduceSink. + intersectSmallTableReduceSink.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posSmallTable, intersectSmallTableReduceSink); + + return intersectMapJoinOp; + } + + /* + * Create auxiliary ReduceSink that sends first-time key matches from FULL OUTER MapJoin to + * FULL OUTER INTERSECT MapJoin. + */ + private static ReduceSinkOperator addAuxiliaryReduceSink( + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, + FullOuterMapJoinBigTableInfo fullOuterMapJoinBigTableInfo) + throws SemanticException { + + // Get the column names of the aggregations for reduce sink + List mapJoinOutputNames = mapJoinOp.getConf().getOutputColumnNames(); + + ArrayList mapJoinSignature = mapJoinOp.getSchema().getSignature(); + + ArrayList auxiliaryReduceSinkKeyExprs = new ArrayList(); + int[] bigTableOutputKeyColumnNums = fullOuterMapJoinBigTableInfo.getOutputKeyColumnNums(); + final int bigTableOutputKeySize = bigTableOutputKeyColumnNums.length; + for (int i = 0; i < bigTableOutputKeySize; i++) { + final int bigTableOutputKeyColumnNum = bigTableOutputKeyColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputKeyColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputKeyColumnNum), "", false); + auxiliaryReduceSinkKeyExprs.add(colExpr); + } + + ArrayList auxiliaryReduceSinkValueExprs = new ArrayList(); + List auxiliaryValueOutputColumnNames = new ArrayList(); + int[] bigTableOutputValueColumnNums = fullOuterMapJoinBigTableInfo.getOutputValueColumnNums(); + final int bigTableOutputValueSize = bigTableOutputValueColumnNums.length; + for (int i = 0; i < bigTableOutputValueSize; i++) { + final int bigTableOutputValueColumnNum = bigTableOutputValueColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputValueColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputValueColumnNum), "", false); + auxiliaryReduceSinkValueExprs.add(colExpr); + auxiliaryValueOutputColumnNames.add("_col" + i); + } + + ReduceSinkDesc auxiliaryReduceSinkDesc = + PlanUtils.getReduceSinkDesc( + auxiliaryReduceSinkKeyExprs, + auxiliaryReduceSinkValueExprs, + auxiliaryValueOutputColumnNames, + false, -1, 0, 1, Operation.NOT_ACID); + auxiliaryReduceSinkDesc.setPartitionCols(new ArrayList()); + auxiliaryReduceSinkDesc.setNumReducers(1); + auxiliaryReduceSinkDesc.setOutputName("auxiliaryIntersect"); + + ReduceSinkOperator auxiliaryReduceSink = + (ReduceSinkOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSinkDesc, + new RowSchema(mapJoinOp.getSchema()), + mapJoinOp); + auxiliaryReduceSink.setColumnExprMap(new HashMap()); + + return auxiliaryReduceSink; + } + + /* + * Add a SELECT as the root of the FULL OUTER INTERSECT Reducer to rename the Reduce-Shuffle + * column names to the ones needed by FULL OUTER INTERSECT MapJoin. + */ + private static SelectOperator addRenameSelect( + ReduceSinkOperator auxiliaryReduceSink, + FullOuterMapJoinBigTableInfo fullOuterMapJoinBigTableInfo) + throws SemanticException{ + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + // A rename SELECT that maps column names... + Map renameSelectColNameToExprMap = new HashMap(); + + // Order these maps by input column number. + Map renameSelectColNumToExprMap = new TreeMap(); + Map renameSelectColNumToOutputNameMap = new TreeMap(); + + /* + * Keys. + */ + ArrayList auxiliarReduceSinkKeyCols = auxiliaryReduceSinkDesc.getKeyCols(); + List auxiliarReduceSinkOutputKeyColumnNames = + auxiliaryReduceSinkDesc.getOutputKeyColumnNames(); + int[] bigTableInputKeyColumnMap = fullOuterMapJoinBigTableInfo.getInputKeyColumnMap(); + String[] bigTableInputKeyColumnNames = fullOuterMapJoinBigTableInfo.getInputKeyColumnNames(); + final int renameKeySize = auxiliarReduceSinkKeyCols.size(); + int columnNum = 0; + String keyPrefix = Utilities.ReduceField.KEY.name() + "."; + for (int i = 0; i < renameKeySize; i++) { + String inputColumnName = keyPrefix + auxiliarReduceSinkOutputKeyColumnNames.get(i); + ExprNodeColumnDesc keyColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkKeyCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + keyColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputKeyColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + final String outputColumnName = bigTableInputKeyColumnNames[i]; + renameSelectColNumToOutputNameMap.put(inputColumnNum, outputColumnName); + } + + /* + * Values. + */ + ArrayList auxiliarReduceSinkValueCols = auxiliaryReduceSinkDesc.getValueCols(); + List auxiliarReduceSinkOutputValueColumnNames = + auxiliaryReduceSinkDesc.getOutputValueColumnNames(); + int[] bigTableInputValueColumnMap = fullOuterMapJoinBigTableInfo.getInputValueColumnMap(); + String[] bigTableInputValueColumnNames = fullOuterMapJoinBigTableInfo.getInputValueColumnNames(); + final int renameValueSize = auxiliarReduceSinkValueCols.size(); + String valuePrefix = Utilities.ReduceField.VALUE.name() + "."; + for (int i = 0; i < renameValueSize; i++) { + String inputColumnName = valuePrefix + auxiliarReduceSinkOutputValueColumnNames.get(i); + ExprNodeColumnDesc valueColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkValueCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + valueColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputValueColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + final String outputColumnName = bigTableInputValueColumnNames[i]; + renameSelectColNumToOutputNameMap.put(inputColumnNum, outputColumnName); + } + + List renameSelectColExprs = new ArrayList(); + renameSelectColExprs.addAll(renameSelectColNumToExprMap.values()); + + List renameSelectOutputColumnNames = new ArrayList(); + renameSelectOutputColumnNames.addAll(renameSelectColNumToOutputNameMap.values()); + + ArrayList renameSelectColumnInfo = new ArrayList(); + + final int renameSelectSize = renameSelectColExprs.size(); + for (int i = 0; i < renameSelectSize; i++) { + String outputColumnName = renameSelectOutputColumnNames.get(i); + ColumnInfo colInfo = + new ColumnInfo( + outputColumnName, + renameSelectColExprs.get(i).getTypeInfo(), + "", false); + renameSelectColumnInfo.add(colInfo); + } + + SelectDesc renameSelectDesc = + new SelectDesc( + renameSelectColExprs, + renameSelectOutputColumnNames); + + SelectOperator renameSelect = + (SelectOperator) OperatorFactory.get( + auxiliaryReduceSink.getCompilationOpContext(), + renameSelectDesc); + renameSelect.setSchema(new RowSchema(renameSelectColumnInfo)); + renameSelect.setColumnExprMap(renameSelectColNameToExprMap); + + return renameSelect; + } + + /* + * See the "JOIN to MAPJOIN Transformation" and "SHARED-MEMORY FULL OUTER MapJoin" pictures in + * HIVE-18908: "Add support for FULL OUTER JOIN to MapJoin" for a visual idea of the planning + * added for the FULL OUTER INTERSECT Reducer. + */ + public static void generateSharedMemoryPlan(MapJoinOperator mapJoinOp) + throws SemanticException { + + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + + int posBigTable = mapJoinDesc.getPosBigTable(); + int posSmallTable = (posBigTable == 0 ? 1 : 0); + + /* + * Add 2nd ReduceSink for sending Small Table to FULL OUTER INTERSECT MapJoin, too. + */ + ReduceSinkOperator intersectSmallTableReduceSink = + addIntersectSmallTableReduceSink(mapJoinOp, posSmallTable); + + /* + * Create FULL OUTER INTERSECT MapJoin. + */ + + // Get ready to set the FULL OUTER INTERCEPT MapJoin parents. + List> intersectMapJoinParents = + new ArrayList>(); + intersectMapJoinParents.add(null); + intersectMapJoinParents.add(null); + + MapJoinOperator intersectMapJoinOp = + createIntersectMapJoin( + intersectSmallTableReduceSink, + intersectMapJoinParents, + mapJoinOp, mapJoinDesc, posSmallTable); + + /* + * Create auxiliary ReduceSink that sends first-time key matches from FULL OUTER MapJoin to + * FULL OUTER INTERSECT MapJoin. + */ + FullOuterMapJoinBigTableInfo fullOuterMapJoinBigTableInfo = + VectorMapJoinBaseOperator.getFullOuterMapJoinBigTableInfo(mapJoinDesc); + + ReduceSinkOperator auxiliaryReduceSink = + addAuxiliaryReduceSink(mapJoinOp, mapJoinDesc, fullOuterMapJoinBigTableInfo); + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + /* + * Add a SELECT as the root of the FULL OUTER INTERSECT Reducer to rename the Reduce-Shuffle + * column names to the ones needed by FULL OUTER INTERSECT MapJoin. + */ + SelectOperator renameSelect = + addRenameSelect( + auxiliaryReduceSink, + fullOuterMapJoinBigTableInfo); + + /* + * Do current new operators connecting. + */ + auxiliaryReduceSink.getChildOperators().add(renameSelect); + renameSelect.getParentOperators().add(auxiliaryReduceSink); + + renameSelect.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posBigTable, renameSelect); + intersectMapJoinOp.setParentOperators(intersectMapJoinParents); + + /* + * Put the special UNION operator in to combine the output of FULL OUTER MapJoin and + * FULL OUTER INTERSECT MapJoin operators. + */ + + // Detatch child below MapJoin. + Operator mapJoinChild = mapJoinOp.getChildOperators().get(0); + mapJoinOp.setChildOperators(new ArrayList>()); + mapJoinChild.setParentOperators(new ArrayList>()); + + ArrayList> unionParents = + new ArrayList>(); + unionParents.add(mapJoinOp); + unionParents.add(intersectMapJoinOp); + + UnionOperator unionOp = + (UnionOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSink.getCompilationOpContext(), + new UnionDesc(), + new RowSchema(mapJoinOp.getSchema().getSignature()), + unionParents); + + unionOp.getChildOperators().add(mapJoinChild); + mapJoinChild.getParentOperators().add(unionOp); + + mapJoinOp.getChildOperators().add(auxiliaryReduceSink); + } + + public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { + int[][] filterMaps = mapJoinDesc.getFilterMap(); + if (filterMaps == null) { + return; + } + final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); + final int numAliases = mapJoinDesc.getExprs().size(); + List valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); + for (byte pos = 0; pos < numAliases; pos++) { + if (pos != posBigTable) { + int[] filterMap = filterMaps[pos]; + TableDesc tableDesc = valueFilteredTblDescs.get(pos); + Properties properties = tableDesc.getProperties(); + String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); + String columnNameDelimiter = + properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? + properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : + String.valueOf(SerDeUtils.COMMA); + + String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + List columnNameList; + if (columnNameProperty.length() == 0) { + columnNameList = new ArrayList(); + } else { + columnNameList = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + } + List truncatedColumnNameList = columnNameList.subList(0, columnNameList.size() - 1); + String truncatedColumnNameProperty = + Joiner.on(columnNameDelimiter).join(truncatedColumnNameList); + + List columnTypeList; + if (columnTypeProperty.length() == 0) { + columnTypeList = new ArrayList(); + } else { + columnTypeList = TypeInfoUtils + .getTypeInfosFromTypeString(columnTypeProperty); + } + if (!columnTypeList.get(columnTypeList.size() - 1).equals(TypeInfoFactory.shortTypeInfo)) { + throw new SemanticException("Expecting filterTag smallint as last column type"); + } + List truncatedColumnTypeList = + columnTypeList.subList(0, columnTypeList.size() - 1); + String truncatedColumnTypeProperty = + Joiner.on(",").join(truncatedColumnTypeList); + + properties.setProperty(serdeConstants.LIST_COLUMNS, truncatedColumnNameProperty); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, truncatedColumnTypeProperty); + } + } + mapJoinDesc.setFilterMap(null); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 275a31f..bb5ed33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -48,6 +48,9 @@ import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.FullOuterMapJoinBigTableInfo; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; @@ -75,6 +78,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -356,6 +360,211 @@ public MapJoinOperator convertMapJoin(HiveConf conf, return mapJoinOp; } + public static boolean onExpressionHasNullSafes(MapJoinDesc desc) { + boolean[] nullSafes = desc.getNullSafes(); + if (nullSafes == null) { + return false; + } + for (boolean nullSafe : nullSafes) { + if (nullSafe) { + return true; + } + } + return false; + } + + private static boolean checkFullOuterMapJoinKeysOneSide(HiveConf hiveConf, + JoinOperator joinOp, boolean isLeftSide) throws SemanticException { + + JoinDesc joinDesc = joinOp.getConf(); + byte mapJoinPos = (byte) (isLeftSide ? 0 : 1); + MapJoinDesc mapJoinDesc = + getMapJoinDesc(hiveConf, joinOp, isLeftSide, joinDesc.getBaseSrc(), + joinDesc.getMapAliases(), mapJoinPos, + /* noCheckOuterJoin */ true, /* adjustParentsChildren */ false); + + FullOuterMapJoinBigTableInfo bigTableInfo = + VectorMapJoinBaseOperator.getFullOuterMapJoinBigTableInfo(mapJoinDesc); + if (bigTableInfo == null) { + + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + " Not all Big Table keys are columns or " + + " not all Big Table keys appear in the output result"); + } + return false; + } + + if (onExpressionHasNullSafes(mapJoinDesc)) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + "nullsafe not supported"); + } + return false; + } + + if (mapJoinDesc.getResidualFilterExprs() != null && + mapJoinDesc.getResidualFilterExprs().size() != 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + "non-equi joins not supported"); + } + return false; + } + + return true; + } + + private static boolean checkFullOuterMapJoinKeys(HiveConf hiveConf, + JoinOperator joinOp) throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + + // We don't know which will be the Big and Small table, so generate temporary MapJoinDesc + // for both... + if (!checkFullOuterMapJoinKeysOneSide(hiveConf, joinOp, /* isLeftSide */ true)) { + return false; + } + if (!checkFullOuterMapJoinKeysOneSide(hiveConf, joinOp, /* isLeftSide */ false)) { + return false; + } + + Byte[] order = joinDesc.getTagOrder(); + ExprNodeDesc[][] joinKeysArray = joinDesc.getJoinKeys(); + for (int i = 0; i < order.length; i++) { + byte pos = order[i]; + ExprNodeDesc[] keyExprs = joinKeysArray[pos]; + for (ExprNodeDesc keyExpr : keyExprs) { + TypeInfo typeInfo = keyExpr.getTypeInfo(); + + // Verify we handle the key column types for an optimized table. This is the effectively + // the same check used in Tez HashTableLoader. + if (!MapJoinKey.isSupportedField(typeInfo)) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + " key type " + typeInfo.toString() + " not supported"); + } + return false; + } + } + } + return true; + } + + public static boolean determineEnableFullOuterMapJoin(HiveConf hiveConf, JoinOperator joinOp) + throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); + JoinCondDesc[] conds = joinDesc.getConds(); + + /* + * Are we even being asked to do a FULL OUTTER JOIN? + */ + boolean hasFullOuterJoin = false; + for (JoinCondDesc cond : conds) { + if (cond.getType() == JoinDesc.FULL_OUTER_JOIN) { + hasFullOuterJoin = true; + break; + } + } + if (!hasFullOuterJoin) { + return false; + } + + /* + * Check our FULL OUTER MapJoin restrictions. + */ + + final boolean isEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINFULLOUER); + if (!isEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVEMAPJOINFULLOUER.varname + " is false"); + } + return false; + } + + if (conds.length > 1) { + + // No multiple condition FULL OUTER MapJoin. + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: multiple JOIN conditions not supported"); + } + return false; + } + + final boolean isMapReduceEngine = + HiveConf.getVar( + hiveConf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr"); + if (isMapReduceEngine) { + + // Only Tez, Spark, etc. + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: Map-Reduce engine not supported"); + } + return false; + } + + /* + * Optimized Hash Table (i.e. not old-style MR HashMap). + */ + final boolean isOptimizedHashTableEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); + if (!isOptimizedHashTableEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE.varname + " is false"); + } + return false; + } + + boolean isKeysCompatibleForFullOuterMapJoin = checkFullOuterMapJoinKeys(hiveConf, joinOp); + if (!isKeysCompatibleForFullOuterMapJoin) { + return false; + } + + final boolean isHybridGraceFullOuterMapJoinEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINFULLOUERHYBRIDGRACE); + final boolean isHybridGraceEnabled = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN); + if (!isHybridGraceFullOuterMapJoinEnabled && isHybridGraceEnabled) { + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVEMAPJOINFULLOUERHYBRIDGRACE.varname + " is false, and " + + HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN.varname + " is true"); + } + return false; + } + + final boolean isTestFullOuterMapJoinOverride = + HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE); + if (isTestFullOuterMapJoinOverride) { + + // Ignore the HIVEMAPJOINFULLOUER setting. + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin not enabled: " + + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE.varname + " is true -- " + + "overriding ebable conditions"); + } + return false; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("FULL OUTER MapJoin enabled"); + } + return true; + } + public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { @@ -502,6 +711,11 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o * @return set of big table candidates */ public static Set getBigTableCandidates(JoinCondDesc[] condns) { + return getBigTableCandidates(condns, false); + } + + public static Set getBigTableCandidates(JoinCondDesc[] condns, + boolean isEnableFullOuterJoin) { Set bigTableCandidates = new HashSet(); boolean seenOuterJoin = false; @@ -516,14 +730,18 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o seenPostitions.add(condn.getRight()); if (joinType == JoinDesc.FULL_OUTER_JOIN) { - // setting these 2 parameters here just in case that if the code got - // changed in future, these 2 are not missing. + // UNDONE: For now, pretend it is a LEFT OUTER JOIN... seenOuterJoin = true; lastSeenRightOuterJoin = false; - // empty set - cannot convert - return new HashSet(); - } else if (joinType == JoinDesc.LEFT_OUTER_JOIN - || joinType == JoinDesc.LEFT_SEMI_JOIN) { + if (!isEnableFullOuterJoin) { + // Empty set - cannot convert + return new HashSet(); + } + if(bigTableCandidates.size() == 0) { + bigTableCandidates.add(condn.getLeft()); + } + } else if (joinType == JoinDesc.LEFT_OUTER_JOIN || + joinType == JoinDesc.LEFT_SEMI_JOIN) { seenOuterJoin = true; if(bigTableCandidates.size() == 0) { bigTableCandidates.add(condn.getLeft()); @@ -1044,6 +1262,7 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinCondDesc[] condns = desc.getConds(); Byte[] tagOrder = desc.getTagOrder(); + // UNDONE: Fix this comment... // outer join cannot be performed on a table which is being cached if (!noCheckOuterJoin) { if (checkMapJoin(mapJoinPos, condns) < 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13a2fc4..44d772d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -67,6 +67,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectStringOperator; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; @@ -121,6 +127,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MergeJoinWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -709,10 +716,63 @@ public VectorDesc getVectorDesc() { } } - private List> newOperatorList() { + private static List> newOperatorList() { return new ArrayList>(); } + public static void debugDisplayJoinOperatorTree(Operator joinOperator, + String prefix) { + List> currentParentList = newOperatorList(); + currentParentList.add(joinOperator); + + int depth = 0; + do { + List> nextParentList = newOperatorList(); + + final int count = currentParentList.size(); + for (int i = 0; i < count; i++) { + Operator parent = currentParentList.get(i); + System.out.println(prefix + " parent depth " + depth + " " + + parent.getClass().getSimpleName() + " " + parent.toString()); + + List> parentList = parent.getParentOperators(); + if (parentList == null || parentList.size() == 0) { + continue; + } + + nextParentList.addAll(parentList); + } + + currentParentList = nextParentList; + depth--; + } while (currentParentList.size() > 0); + + List> currentChildList = newOperatorList(); + currentChildList.addAll(joinOperator.getChildOperators()); + + depth = 1; + do { + List> nextChildList = newOperatorList(); + + final int count = currentChildList.size(); + for (int i = 0; i < count; i++) { + Operator child = currentChildList.get(i); + System.out.println(prefix + " child depth " + depth + " " + + child.getClass().getSimpleName() + " " + child.toString()); + + List> childList = child.getChildOperators(); + if (childList == null || childList.size() == 0) { + continue; + } + + nextChildList.addAll(childList); + } + + currentChildList = nextChildList; + depth--; + } while (currentChildList.size() > 0); + } + private Operator validateAndVectorizeOperatorTree( Operator nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, @@ -946,6 +1006,11 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + } else if (baseWork instanceof MergeJoinWork){ + MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + + // MergeJoinExplainVectorization will say vectorization not supported. + mergeJoinWork.setVectorizationExamined(true); } } } else if (currTask instanceof SparkTask) { @@ -2240,7 +2305,7 @@ private boolean validateMapJoinDesc(MapJoinDesc desc) { return false; } if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { - LOG.info("Cannot vectorize join with complex ON clause"); + setOperatorIssue("Non-equi joins not supported"); return false; } return true; @@ -2928,7 +2993,7 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; HashTableKind hashTableKind = HashTableKind.NONE; HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE; + VectorMapJoinVariation vectorMapJoinVariation = null; if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; @@ -2998,6 +3063,10 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorMapJoinVariation = VectorMapJoinVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; + case JoinDesc.FULL_OUTER_JOIN: + vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER; + hashTableKind = HashTableKind.HASH_MAP; + break; case JoinDesc.LEFT_SEMI_JOIN: vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; @@ -3027,6 +3096,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterLongOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectLongOperator.class; + } else { + opClass = VectorMapJoinFullOuterLongOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3045,6 +3121,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterStringOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectStringOperator.class; + } else { + opClass = VectorMapJoinFullOuterStringOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3063,6 +3146,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterMultiKeyOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectMultiKeyOperator.class; + } else { + opClass = VectorMapJoinFullOuterMultiKeyOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3078,6 +3168,11 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + // UNDONE: Not needed for SHARED-MEMORY Non-INTERSECT. + vectorDesc.setIsSaveNullKeyValuesForFullOuter(true); + } vectorDesc.setMinMaxEnabled(minMaxEnabled); vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -3190,6 +3285,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi /* * Similarly, we need a mapping since a value expression can be a calculation and the value * will go into a scratch column. + * + * Value expressions include keys? YES. */ int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; @@ -3229,18 +3326,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* - * Small table information. + * Column mapping. */ - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); @@ -3250,7 +3353,6 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi * Gather up big and small table output result information from the MapJoinDesc. */ List bigTableRetainList = desc.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); int[] smallTableIndices; int smallTableIndicesSize; @@ -3287,6 +3389,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + + final int bigTableRetainSize = bigTableRetainList.size(); for (int i = 0; i < bigTableRetainSize; i++) { // Since bigTableValueExpressions may do a calculation and produce a scratch column, we @@ -3300,9 +3404,10 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); } nextOutputColumn++; @@ -3319,10 +3424,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi nextOutputColumn = firstSmallTableOutputColumn; // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - String[] bigTableRetainedNames; if (smallTableIndicesSize > 0) { smallTableOutputCount = smallTableIndicesSize; - bigTableRetainedNames = new String[smallTableOutputCount]; for (int i = 0; i < smallTableIndicesSize; i++) { if (smallTableIndices[i] >= 0) { @@ -3334,34 +3437,39 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex]; TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; if (!isOuterJoin) { // Optimize inner join keys of small table results. + // UNDONE: The columns seem backwards here... // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo); + + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) { - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); + // When the Big Key is not retained in the output result, we do need to copy the + // Big Table key into the overflow batch so the projection of it (Big Table key) to + // the Small Table key will work properly... + // + nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo); } } else { - // For outer joins, since the small table key can be null when there is no match, + // For outer joins, since the small table key can be null when there for NOMATCH, // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. + // projection optimization used by non-[FULL} OUTER joins above. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo); } } else { @@ -3375,21 +3483,18 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); // Make a new big table scratch column for the small table value. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } nextOutputColumn++; } } else if (smallTableRetainSize > 0) { smallTableOutputCount = smallTableRetainSize; - bigTableRetainedNames = new String[smallTableOutputCount]; // Only small table values appear in join output result. @@ -3402,21 +3507,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - // Make a new big table scratch column for the small table value. TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); nextOutputColumn++; } - } else { - bigTableRetainedNames = new String[0]; } + Map> filterExpressions = desc.getFilters(); + VectorExpression[] bigTableFilterExpressions = + vContext.getVectorExpressions( + filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions); + boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); @@ -3472,15 +3580,23 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - smallTableMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); + + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -4111,8 +4227,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { return new ImmutablePair, String>(vectorOp, null); } - static int fake; - public static Operator vectorizeSelectOperator( Operator selectOp, VectorizationContext vContext, VectorSelectDesc vectorSelectDesc) diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 105ef08..73882d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -47,6 +47,7 @@ private boolean vectorization = false; private boolean vectorizationOnly = false; private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private boolean debug = false; private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -137,6 +138,14 @@ public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDe this.vectorizationDetailLevel = vectorizationDetailLevel; } + public boolean isDebug() { + return debug; + } + + public void setDebug(boolean debug) { + this.debug = debug; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 63b13c8..75ba3af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -111,6 +111,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { i++; } } + } else if (explainOptions == HiveParser.KW_DEBUG) { + config.setDebug(true); } else { // UNDONE: UNKNOWN OPTION? } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index a1ec96c..f70fc67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -187,6 +187,7 @@ KW_FILE: 'FILE'; KW_JAR: 'JAR'; KW_EXPLAIN: 'EXPLAIN'; KW_EXTENDED: 'EXTENDED'; +KW_DEBUG: 'DEBUG'; KW_FORMATTED: 'FORMATTED'; KW_DEPENDENCY: 'DEPENDENCY'; KW_LOGICAL: 'LOGICAL'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 3abc752..8a90c8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -558,6 +558,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_FUNCTION", "FUNCTION"); xlateMap.put("KW_EXPLAIN", "EXPLAIN"); xlateMap.put("KW_EXTENDED", "EXTENDED"); + xlateMap.put("KW_DEBUG", "DEBUG"); xlateMap.put("KW_SERDE", "SERDE"); xlateMap.put("KW_WITH", "WITH"); xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES"); @@ -789,6 +790,7 @@ explainOption | KW_ANALYZE | KW_REOPTIMIZATION | (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + | KW_DEBUG ; vectorizationOnly diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 2bba33f..0e6006c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -828,6 +828,7 @@ nonReserved | KW_OPERATOR | KW_EXPRESSION | KW_DETAIL + | KW_DEBUG | KW_WAIT | KW_ZONE | KW_TIMESTAMPTZ diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index da30243..030bb61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -28,7 +28,7 @@ @Retention(RetentionPolicy.RUNTIME) public @interface Explain { public enum Level { - USER, DEFAULT, EXTENDED; + USER, DEFAULT, EXTENDED, DEBUG; public boolean in(Level[] levels) { for (Level level : levels) { if (level.equals(this)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index cde7852..a854819 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -130,6 +130,10 @@ public VectorizationDetailLevel isVectorizationDetailLevel() { return config.getVectorizationDetailLevel(); } + public boolean isDebug() { + return config.isDebug(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java index ea22131..acbbba9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java @@ -125,7 +125,7 @@ public String getJoinCondString() { sb.append("Inner Join "); break; case JoinDesc.FULL_OUTER_JOIN: - sb.append("Outer Join "); + sb.append("Full Outer Join "); break; case JoinDesc.LEFT_OUTER_JOIN: sb.append("Left Outer Join "); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 5b7f4c3..32f3746 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -291,7 +291,7 @@ public void setExprs(final Map> exprs) { first = false; sb.append("{"); - sb.append(expr.getExprString()); + sb.append(expr == null ? "NULL" : expr.getExprString()); sb.append("}"); } } @@ -560,6 +560,9 @@ public void setFilterMap(int[][] filterMap) { return null; } filterMap = compactFilter(filterMap); + if (filterMap == null) { + return null; + } Map result = new LinkedHashMap(); for (int i = 0 ; i < filterMap.length; i++) { if (filterMap[i] == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 91ea159..c39a33f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -33,11 +33,16 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hive.common.util.ReflectionUtil; /** * Map Join operator Descriptor implementation. @@ -82,6 +87,7 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; + private boolean isFullOuterIntersect = false; public MapJoinDesc() { bigTableBucketNumMapping = new LinkedHashMap(); @@ -92,6 +98,7 @@ public MapJoinDesc(MapJoinDesc clone) { this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; + this.valueFilteredTblDescs = clone.valueFilteredTblDescs; this.posBigTable = clone.posBigTable; this.valueIndices = clone.valueIndices; this.retainList = clone.retainList; @@ -211,6 +218,16 @@ public void setDumpFilePrefix(String dumpFilePrefix) { this.dumpFilePrefix = dumpFilePrefix; } + // NOTE: Debugging only. + @Explain(displayName = "keyExpressions", explainLevels = { Level.DEBUG }) + public Map getKeyExpressionString() { + Map keyMap = new LinkedHashMap(); + for (Map.Entry> k: getKeys().entrySet()) { + keyMap.put(k.getKey(), k.getValue().toString()); + } + return keyMap; + } + /** * @return the keys in string form */ @@ -296,6 +313,60 @@ public void setValueFilteredTblDescs(List valueFilteredTblDescs) { return valueTblDescs; } + // NOTE: Debugging only. + @Explain(displayName = "keyContext", explainLevels = { Level.DEBUG }) + public String getDebugKeyContext() { + MapJoinObjectSerDeContext keyContext; + try { + AbstractSerDe keySerde = + (AbstractSerDe) ReflectionUtil.newInstance( + keyTblDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(keySerde, null, keyTblDesc.getProperties(), null); + keyContext = new MapJoinObjectSerDeContext(keySerde, false); + } catch (SerDeException e) { + return null; + } + return keyContext.stringify(); + } + + private boolean hasFilter(int alias, int[][] filterMaps) { + return filterMaps != null && filterMaps[alias] != null; + } + + // NOTE: Debugging only. + @Explain(displayName = "valueContexts", explainLevels = { Level.DEBUG }) + public String getDebugValueContext() { + List valueContextStringList = new ArrayList(); + try { + boolean noOuterJoin = getNoOuterJoin(); + // Order in which the results should be output. + Byte[] order = getTagOrder(); + int[][] filterMaps = getFilterMap(); + + for (int pos = 0; pos < order.length; pos++) { + if (pos == posBigTable) { + continue; + } + TableDesc valueTableDesc; + if (noOuterJoin) { + valueTableDesc = getValueTblDescs().get(pos); + } else { + valueTableDesc = getValueFilteredTblDescs().get(pos); + } + AbstractSerDe valueSerDe = + (AbstractSerDe) ReflectionUtil.newInstance( + valueTableDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); + MapJoinObjectSerDeContext valueContext = + new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos, filterMaps)); + valueContextStringList.add(pos + ":" + valueContext.stringify()); + } + } catch (SerDeException e) { + return null; + } + return valueContextStringList.toString(); + } + /** * @param valueTblDescs * the valueTblDescs to set @@ -383,6 +454,8 @@ public boolean getGenJoinKeys() { return genJoinKeys; } + @Explain(displayName = "DynamicPartitionHashJoin", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }, displayOnlyOnTrue = true) public boolean isDynamicPartitionHashJoin() { return isDynamicPartitionHashJoin; } @@ -391,6 +464,35 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + @Explain(displayName = "FullOuterIntersect", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }, displayOnlyOnTrue = true) + public boolean isFullOuterIntersect() { + return isFullOuterIntersect; + } + + public void setFullOuterIntersect(boolean isFullOuterIntersect) { + this.isFullOuterIntersect = isFullOuterIntersect; + } + + // NOTE: Debugging only. + @Explain(displayName = "outer filter mappings", explainLevels = { Level.DEBUG }) + public String getDebugOuterFilterMapString() { + if (conds.length != 1) { + return null; + } + JoinCondDesc cond = conds[0]; + if (cond.getType() != JoinDesc.FULL_OUTER_JOIN && + cond.getType() != JoinDesc.LEFT_OUTER_JOIN && + cond.getType() != JoinDesc.RIGHT_OUTER_JOIN) { + return null; + } + int[][] fm = getFilterMap(); + if (fm == null) { + return null; + } + return Arrays.deepToString(fm); + } + // Use LinkedHashSet to give predictable display order. private static final Set vectorizableMapJoinNativeEngines = new LinkedHashSet(Arrays.asList("tez", "spark")); @@ -406,7 +508,9 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorMapJoinDesc vectorMapJoinDesc) { // VectorMapJoinOperator is not native vectorized. - super(vectorMapJoinDesc, vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); + super( + vectorMapJoinDesc, + vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); this.mapJoinDesc = mapJoinDesc; this.vectorMapJoinDesc = vectorMapJoinDesc; vectorMapJoinInfo = @@ -419,7 +523,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, String engine = vectorMapJoinDesc.getEngine(); String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + + engine + " IN " + vectorizableMapJoinNativeEngines; boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); @@ -474,7 +579,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return conditions; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -482,7 +588,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsMet(nativeConditions); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsNotMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -490,7 +597,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsNotMet(nativeConditions); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "bigTableKeyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableKeyExpressions() { return vectorExpressionsToStringList( isNative ? @@ -498,8 +606,18 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, vectorMapJoinDesc.getAllBigTableKeyExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableKeyColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "hashTableImplementationType", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String hashTableImplementationType() { + if (!isNative) { + return null; + } + return vectorMapJoinDesc.getHashTableImplementationType().name(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyColumns() { if (!isNative) { return null; } @@ -507,10 +625,13 @@ public String getBigTableKeyColumnNums() { if (bigTableKeyColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableKeyColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableKeyColumnMap(), + vectorMapJoinInfo.getBigTableKeyTypeInfos()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableValueExpressions() { return vectorExpressionsToStringList( isNative ? @@ -518,8 +639,18 @@ public String getBigTableKeyColumnNums() { vectorMapJoinDesc.getAllBigTableValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableValueColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableFilterExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableFilterExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableFilterExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueColumns() { if (!isNative) { return null; } @@ -527,48 +658,78 @@ public String getBigTableValueColumnNums() { if (bigTableValueColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableValueColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableValueColumnMap(), + vectorMapJoinInfo.getBigTableValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getSmallTableColumns() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableValueMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSmallTableColumns() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getSmallTableValueMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutput", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getProjectedOutputColumnNums() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getProjectionMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List getBigTableOuterKey() { - if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumnNums() { + if (!isNative) { return null; } - return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + return Arrays.toString(vectorMapJoinInfo.getBigTableRetainColumnMap()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableRetainedColumnNums() { - if (!isNative) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "nonOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getNonOuterSmallTableKeyMapping() { + if (!isNative || + (vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.OUTER || + vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.FULL_OUTER)) { + return null; + } + return Arrays.toString(vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outerSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getOuterSmallTableKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "fullOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getFullOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.FULL_OUTER) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + return columnMappingToStringList(vectorMapJoinInfo.getFullOuterSmallTableKeyMapping()); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeNotSupportedKeyTypes() { return vectorMapJoinDesc.getNotSupportedKeyTypes(); } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public MapJoinOperatorExplainVectorization getMapJoinVectorization() { VectorMapJoinDesc vectorMapJoinDesc = (VectorMapJoinDesc) getVectorDesc(); if (vectorMapJoinDesc == null || this instanceof SMBJoinDesc) { @@ -592,7 +753,8 @@ public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, } // Handle dual nature. - @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { VectorSMBJoinDesc vectorSMBJoinDesc = (VectorSMBJoinDesc) getVectorDesc(); if (vectorSMBJoinDesc == null || !(this instanceof SMBJoinDesc)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index b0ae64a..aa82d30 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.mapred.JobConf; public class MergeJoinWork extends BaseWork { @@ -175,8 +177,63 @@ public void setLlapMode(boolean llapMode) { public boolean getLlapMode() { return getMainWork().getLlapMode(); } - + public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } + + /** + * For now, this class just says in EXPLAIN VECTORIZATION we don't support vectorization of the + * Merge Join vertex instead of being silent about it. + */ + public class MergeJoinExplainVectorization extends BaseExplainVectorization { + + private final MergeJoinWork mergeJoinWork; + + private VectorizationCondition[] mergeWorkVectorizationConditions; + + public MergeJoinExplainVectorization(MergeJoinWork mergeJoinWork) { + super(mergeJoinWork); + this.mergeJoinWork = mergeJoinWork; + } + + private VectorizationCondition[] createMergeWorkExplainVectorizationConditions() { + + boolean enabled = false; + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + "Vectorizing MergeJoin Supported") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(mergeWorkVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(mergeWorkVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "MergeJoin Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MergeJoinExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MergeJoinExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java index 446b810..5439e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -59,6 +59,25 @@ public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) return Arrays.toString(outputColumns); } + public List outputColumnsAndTypesToStringList(int[] outputColumns, TypeInfo[] typeInfos) { + final int size = outputColumns.length; + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(outputColumns[i] + ":" + typeInfos[i].toString()); + } + return result; + } + + public List outputColumnsAndTypesToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + TypeInfo[] typeInfos = vectorColumnMapping.getTypeInfos(); + return outputColumnsAndTypesToStringList(outputColumns, typeInfos); + } + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { final int size = vectorColumnMapping.getCount(); if (size == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index f2955af..04868b0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; @@ -188,6 +189,17 @@ public Object clone() { return outputKeyColumnNames; } + // NOTE: Debugging only. + @Explain(displayName = "output key column names", explainLevels = { Level.DEBUG }) + public List getOutputKeyColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputKeyColumnNames) { + result.add(Utilities.ReduceField.KEY.name() + "." + name); + } + return result; + } + + public void setOutputKeyColumnNames( java.util.ArrayList outputKeyColumnNames) { this.outputKeyColumnNames = outputKeyColumnNames; @@ -197,6 +209,16 @@ public void setOutputKeyColumnNames( return outputValueColumnNames; } + // NOTE: Debugging only. + @Explain(displayName = "output value column names", explainLevels = { Level.DEBUG }) + public List getOutputValueColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputValueColumnNames) { + result.add(Utilities.ReduceField.VALUE.name() + "." + name); + } + return result; + } + public void setOutputValueColumnNames( java.util.ArrayList outputValueColumnNames) { this.outputValueColumnNames = outputValueColumnNames; @@ -536,34 +558,41 @@ public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getKeyColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyColumns() { if (!isNative) { return null; } int[] keyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap(); if (keyColumnMap == null) { // Always show an array. - keyColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(keyColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkKeyColumnMap(), + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getValueColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueColumns() { if (!isNative) { return null; } int[] valueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap(); if (valueColumnMap == null) { // Always show an array. - valueColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(valueColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkValueColumnMap(), + vectorReduceSinkInfo.getReduceSinkValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBucketColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBucketColumns() { if (!isNative) { return null; } @@ -572,11 +601,14 @@ public String getBucketColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(bucketColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkBucketColumnMap(), + vectorReduceSinkInfo.getReduceSinkBucketTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getPartitionColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { if (!isNative) { return null; } @@ -585,7 +617,9 @@ public String getPartitionColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(partitionColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkPartitionColumnMap(), + vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos()); } private VectorizationCondition[] createNativeConditions() { @@ -594,7 +628,8 @@ public String getPartitionColumnNums() { String engine = vectorReduceSinkDesc.getEngine(); String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableReduceSinkNativeEngines; + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + + vectorizableReduceSinkNativeEngines; boolean engineInSupported = vectorizableReduceSinkNativeEngines.contains(engine); VectorizationCondition[] conditions = new VectorizationCondition[] { @@ -629,7 +664,8 @@ public String getPartitionColumnNums() { return conditions; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -637,7 +673,8 @@ public String getPartitionColumnNums() { return VectorizationCondition.getConditionsMet(nativeConditions); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsNotMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -646,7 +683,8 @@ public String getPartitionColumnNums() { } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Reduce Sink Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public ReduceSinkOperatorExplainVectorization getReduceSinkVectorization() { VectorReduceSinkDesc vectorReduceSinkDesc = (VectorReduceSinkDesc) getVectorDesc(); if (vectorReduceSinkDesc == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 58032ca..a8f045c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -85,11 +85,11 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } public static enum VectorMapJoinVariation { - NONE, - INNER_BIG_ONLY, INNER, + INNER_BIG_ONLY, LEFT_SEMI, - OUTER + OUTER, + FULL_OUTER } private HashTableImplementationType hashTableImplementationType; @@ -107,7 +107,7 @@ public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; - vectorMapJoinVariation = VectorMapJoinVariation.NONE; + vectorMapJoinVariation = null; minMaxEnabled = false; allBigTableKeyExpressions = null; @@ -206,6 +206,7 @@ public VectorMapJoinInfo getVectorMapJoinInfo() { private List notSupportedKeyTypes; private boolean smallTableExprVectorizes; private boolean outerJoinHasNoKeys; + boolean isSaveNullKeyValuesForFullOuter; public void setUseOptimizedTable(boolean useOptimizedTable) { this.useOptimizedTable = useOptimizedTable; @@ -274,5 +275,10 @@ public void setIsHybridHashJoin(boolean isHybridHashJoin) { public boolean getIsHybridHashJoin() { return isHybridHashJoin; } - + public void setIsSaveNullKeyValuesForFullOuter(boolean isSaveNullKeyValuesForFullOuter) { + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + } + public boolean getIsSaveNullKeyValuesForFullOuter() { + return isSaveNullKeyValuesForFullOuter; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 6db0540..ad82e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -48,9 +48,19 @@ private TypeInfo[] bigTableValueTypeInfos; private VectorExpression[] slimmedBigTableValueExpressions; - private VectorColumnOutputMapping bigTableRetainedMapping; - private VectorColumnOutputMapping bigTableOuterKeyMapping; - private VectorColumnSourceMapping smallTableMapping; + private VectorExpression[] bigTableFilterExpressions; + + private int[] bigTableRetainColumnMap; + private TypeInfo[] bigTableRetainTypeInfos; + + private int[] nonOuterSmallTableKeyColumnMap; + private TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + private VectorColumnOutputMapping outerSmallTableKeyMapping; + + private VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + + private VectorColumnSourceMapping smallTableValueMapping; private VectorColumnSourceMapping projectionMapping; @@ -65,9 +75,19 @@ public VectorMapJoinInfo() { bigTableValueTypeInfos = null; slimmedBigTableValueExpressions = null; - bigTableRetainedMapping = null; - bigTableOuterKeyMapping = null; - smallTableMapping = null; + bigTableFilterExpressions = null; + + bigTableRetainColumnMap = null; + bigTableRetainTypeInfos = null; + + nonOuterSmallTableKeyColumnMap = null; + nonOuterSmallTableKeyTypeInfos = null; + + outerSmallTableKeyMapping = null; + + fullOuterSmallTableKeyMapping = null; + + smallTableValueMapping = null; projectionMapping = null; } @@ -138,28 +158,69 @@ public void setSlimmedBigTableValueExpressions( this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } - public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { - this.bigTableRetainedMapping = bigTableRetainedMapping; + public VectorExpression[] getBigTableFilterExpressions() { + return bigTableFilterExpressions; + } + + public void setBigTableFilterExpressions(VectorExpression[] bigTableFilterExpressions) { + this.bigTableFilterExpressions = bigTableFilterExpressions; + } + + public void setBigTableRetainColumnMap(int[] bigTableRetainColumnMap) { + this.bigTableRetainColumnMap = bigTableRetainColumnMap; + } + + public int[] getBigTableRetainColumnMap() { + return bigTableRetainColumnMap; + } + + public void setBigTableRetainTypeInfos(TypeInfo[] bigTableRetainTypeInfos) { + this.bigTableRetainTypeInfos = bigTableRetainTypeInfos; + } + + public TypeInfo[] getBigTableRetainTypeInfos() { + return bigTableRetainTypeInfos; + } + + public void setNonOuterSmallTableKeyColumnMap(int[] nonOuterSmallTableKeyColumnMap) { + this.nonOuterSmallTableKeyColumnMap = nonOuterSmallTableKeyColumnMap; + } + + public int[] getNonOuterSmallTableKeyColumnMap() { + return nonOuterSmallTableKeyColumnMap; + } + + public void setNonOuterSmallTableKeyTypeInfos(TypeInfo[] nonOuterSmallTableKeyTypeInfos) { + this.nonOuterSmallTableKeyTypeInfos = nonOuterSmallTableKeyTypeInfos; + } + + public TypeInfo[] getNonOuterSmallTableKeyTypeInfos() { + return nonOuterSmallTableKeyTypeInfos; + } + + public void setOuterSmallTableKeyMapping(VectorColumnOutputMapping outerSmallTableKeyMapping) { + this.outerSmallTableKeyMapping = outerSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableRetainedMapping() { - return bigTableRetainedMapping; + public VectorColumnOutputMapping getOuterSmallTableKeyMapping() { + return outerSmallTableKeyMapping; } - public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { - this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + public void setFullOuterSmallTableKeyMapping( + VectorColumnSourceMapping fullOuterSmallTableKeyMapping) { + this.fullOuterSmallTableKeyMapping = fullOuterSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableOuterKeyMapping() { - return bigTableOuterKeyMapping; + public VectorColumnSourceMapping getFullOuterSmallTableKeyMapping() { + return fullOuterSmallTableKeyMapping; } - public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { - this.smallTableMapping = smallTableMapping; + public void setSmallTableValueMapping(VectorColumnSourceMapping smallTableValueMapping) { + this.smallTableValueMapping = smallTableValueMapping; } - public VectorColumnSourceMapping getSmallTableMapping() { - return smallTableMapping; + public VectorColumnSourceMapping getSmallTableValueMapping() { + return smallTableValueMapping; } public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java index 9f785e6..e5c749f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java @@ -84,9 +84,9 @@ public void testGetNonExistent() throws Exception { map.put(kv2, -1); key[0] = (byte)(key[0] + 1); BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(key, 0, key.length, hashMapResult); + map.getValueResult(key, 0, key.length, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); - map.getValueResult(key, 0, 0, hashMapResult); + map.getValueResult(key, 0, 0, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); } @@ -104,7 +104,7 @@ public void testPutWithFullMap() throws Exception { assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(new byte[0], 0, 0, hashMapResult); + map.getValueResult(new byte[0], 0, 0, hashMapResult, null); } @Test @@ -123,7 +123,7 @@ public void testExpand() throws Exception { private void verifyHashMapResult(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - byte state = map.getValueResult(key, 0, key.length, hashMapResult); + byte state = map.getValueResult(key, 0, key.length, hashMapResult, null); HashSet hs = new HashSet(); int count = 0; if (hashMapResult.hasRows()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java index 6491d79..244208b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java @@ -26,8 +26,22 @@ private static final long serialVersionUID = 1L; + private boolean isClosed; + private boolean isAborted; + public CollectorTestOperator() { super(); + + isClosed = false; + isAborted = false; + } + + public boolean getIsClosed() { + return isClosed; + } + + public boolean getIsAborted() { + return isAborted; } @Override @@ -36,6 +50,14 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean abort) { + isClosed = true; + if (abort) { + isAborted = true; + } + } + + @Override public String getName() { return CollectorTestOperator.class.getSimpleName(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java index 18933d4..ce90a6d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.util.collectoroperator; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -28,19 +31,30 @@ private static final long serialVersionUID = 1L; private final ObjectInspector[] outputObjectInspectors; + private final int columnSize; public RowCollectorTestOperator(ObjectInspector[] outputObjectInspectors) { super(); this.outputObjectInspectors = outputObjectInspectors; + columnSize = outputObjectInspectors.length; } @Override public void process(Object row, int tag) throws HiveException { rowCount++; - Object[] rowObjectArray = (Object[]) row; - Object[] resultObjectArray = new Object[rowObjectArray.length]; - for (int c = 0; c < rowObjectArray.length; c++) { - resultObjectArray[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + Object[] resultObjectArray = new Object[columnSize]; + if (row instanceof ArrayList) { + List rowObjectList = (ArrayList) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectList.get(c)); + } + } else { + Object[] rowObjectArray = (Object[]) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + } } nextTestRow(new RowTestObjects(resultObjectArray)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java index 06cd1e9..a2f9f04 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java @@ -39,6 +39,16 @@ public RowVectorCollectorTestOperator(TypeInfo[] outputTypeInfos, vectorExtractRow.init(outputTypeInfos); } + public RowVectorCollectorTestOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors) throws HiveException { + super(); + this.outputObjectInspectors = outputObjectInspectors; + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(outputTypeInfos, outputProjectionColumnNums); + } + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java index 51a5f8e..ec53a3d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java @@ -26,55 +26,129 @@ import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; public class RowTestObjectsMultiSet { - private SortedMap sortedMap; - private int rowCount; - private int totalCount; + + public static enum RowFlag { + NONE (0), + REGULAR (0x01), + LEFT_OUTER (0x02), + FULL_OUTER (0x04); + + public final long value; + RowFlag(long value) { + this.value = value; + } + } + + private static class Value { + + // Mutable. + public int count; + public long rowFlags; + + public final int initialKeyCount; + public final int initialValueCount; + public final RowFlag initialRowFlag; + + public Value(int count, RowFlag rowFlag, int totalKeyCount, int totalValueCount) { + this.count = count; + this.rowFlags = rowFlag.value; + + initialKeyCount = totalKeyCount; + initialValueCount = totalValueCount; + initialRowFlag = rowFlag; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("count "); + sb.append(count); + return sb.toString(); + } + } + + private SortedMap sortedMap; + private int totalKeyCount; + private int totalValueCount; public RowTestObjectsMultiSet() { - sortedMap = new TreeMap(); - rowCount = 0; - totalCount = 0; + sortedMap = new TreeMap(); + totalKeyCount = 0; + totalValueCount = 0; } - public int getRowCount() { - return rowCount; + public int getTotalKeyCount() { + return totalKeyCount; } - public int getTotalCount() { - return totalCount; + public int getTotalValueCount() { + return totalValueCount; } - public void add(RowTestObjects testRow) { + public void add(RowTestObjects testRow, RowFlag rowFlag) { if (sortedMap.containsKey(testRow)) { - Integer count = sortedMap.get(testRow); - count++; + Value value = sortedMap.get(testRow); + value.count++; + value.rowFlags |= rowFlag.value; + totalValueCount++; } else { - sortedMap.put(testRow, 1); - rowCount++; + sortedMap.put(testRow, new Value(1, rowFlag, ++totalKeyCount, ++totalValueCount)); + } + + } + + public void add(RowTestObjects testRow, int count) { + if (sortedMap.containsKey(testRow)) { + throw new RuntimeException(); + } + sortedMap.put(testRow, new Value(count, RowFlag.NONE, ++totalKeyCount, ++totalValueCount)); + } + + public String displayRowFlags(long rowFlags) { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (RowFlag rowFlag : RowFlag.values()) { + if ((rowFlags & rowFlag.value) != 0) { + if (sb.length() > 1) { + sb.append(", "); + } + sb.append(rowFlag.name()); + } } - totalCount++; + sb.append("}"); + return sb.toString(); } - public boolean verify(RowTestObjectsMultiSet other) { + public boolean verify(RowTestObjectsMultiSet other, String left, String right) { final int thisSize = this.sortedMap.size(); final int otherSize = other.sortedMap.size(); if (thisSize != otherSize) { - System.out.println("*VERIFY* count " + thisSize + " doesn't match otherSize " + otherSize); + System.out.println("*BENCHMARK* " + left + " count " + thisSize + " doesn't match " + right + " " + otherSize); return false; } - Iterator> thisIterator = this.sortedMap.entrySet().iterator(); - Iterator> otherIterator = other.sortedMap.entrySet().iterator(); + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + Iterator> otherIterator = other.sortedMap.entrySet().iterator(); for (int i = 0; i < thisSize; i++) { - Entry thisEntry = thisIterator.next(); - Entry otherEntry = otherIterator.next(); + Entry thisEntry = thisIterator.next(); + Entry otherEntry = otherIterator.next(); if (!thisEntry.getKey().equals(otherEntry.getKey())) { - System.out.println("*VERIFY* thisEntry.getKey() " + thisEntry.getKey() + " doesn't match otherEntry.getKey() " + otherEntry.getKey()); + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + + " count " + thisEntry.getValue().count + ")" + + " but found " + right + " row " + otherEntry.getKey().toString() + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } // Check multi-set count. - if (!thisEntry.getValue().equals(otherEntry.getValue())) { - System.out.println("*VERIFY* key " + thisEntry.getKey() + " count " + thisEntry.getValue() + " doesn't match " + otherEntry.getValue()); + if (thisEntry.getValue().count != otherEntry.getValue().count) { + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " count " + thisEntry.getValue().count + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + ")" + + " doesn't match " + right + " row count " + otherEntry.getValue().count + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } } @@ -84,6 +158,51 @@ public boolean verify(RowTestObjectsMultiSet other) { return true; } + public RowTestObjectsMultiSet subtract(RowTestObjectsMultiSet other) { + RowTestObjectsMultiSet result = new RowTestObjectsMultiSet(); + + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + while (thisIterator.hasNext()) { + Entry thisEntry = thisIterator.next(); + + if (other.sortedMap.containsKey(thisEntry.getKey())) { + Value thisValue = thisEntry.getValue(); + Value otherValue = other.sortedMap.get(thisEntry.getKey()); + if (thisValue.count == otherValue.count) { + continue; + } + } + result.add(thisEntry.getKey(), thisEntry.getValue().count); + } + + return result; + } + + public void displayDifferences(RowTestObjectsMultiSet other, String left, String right) { + + RowTestObjectsMultiSet leftOnly = this.subtract(other); + Iterator> leftOnlyIterator = + leftOnly.sortedMap.entrySet().iterator(); + while (leftOnlyIterator.hasNext()) { + Entry leftOnlyEntry = leftOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + left + " only row " + leftOnlyEntry.getKey().toString() + + " count " + leftOnlyEntry.getValue().count + + " (initialRowFlag " + leftOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + + RowTestObjectsMultiSet rightOnly = other.subtract(this); + Iterator> rightOnlyIterator = + rightOnly.sortedMap.entrySet().iterator(); + while (rightOnlyIterator.hasNext()) { + Entry rightOnlyEntry = rightOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + right + " only row " + rightOnlyEntry.getKey().toString() + + " count " + rightOnlyEntry.getValue().count + + " (initialRowFlag " + rightOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + } + @Override public String toString() { return sortedMap.toString(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java index f163289..85e5cb3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java @@ -35,8 +35,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testDouble() throws Exception { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 2d0c783..6ce63a4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -62,9 +62,8 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, OperatorDesc conf * Override forward to do validation */ @Override - public void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + public void vectorForward(VectorizedRowBatch vrg) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch) row; int[] projections = vrg.projectedColumns; assertEquals(2, vrg.projectionSize); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index 0514e3f..cb68dae 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -26,10 +26,14 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -37,31 +41,42 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -90,45 +105,209 @@ NATIVE_VECTOR_FAST } + public static boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { + return + (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && + mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + + /* + * This test collector operator is for MapJoin row-mode. + */ + public static class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public TestMultiSetCollectorOperator( + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) { + super(outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetCollectorOperator.class.getSimpleName(); + } + } + + public static class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) + throws HiveException { + super(outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) throws HiveException { + super(outputProjectionColumnNums, outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetVectorCollectorOperator.class.getSimpleName(); + } + } + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { + return createMapJoinDesc(testDesc, false); + } + + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc, + boolean isFullOuterIntersect) { MapJoinDesc mapJoinDesc = new MapJoinDesc(); + mapJoinDesc.setPosBigTable(0); - List keyExpr = new ArrayList(); + + List bigTableKeyExpr = new ArrayList(); for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) { - keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false)); + bigTableKeyExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableKeyTypeInfos[i], + testDesc.bigTableKeyColumnNames[i], "B", false)); } Map> keyMap = new HashMap>(); - keyMap.put((byte)0, keyExpr); + keyMap.put((byte) 0, bigTableKeyExpr); + + mapJoinDesc.setFullOuterIntersect(isFullOuterIntersect); + + // Big Table expression includes all columns -- keys and extra (value) columns. + // UNDONE: Assumes all values retained... + List bigTableExpr = new ArrayList(); + for (int i = 0; i < testDesc.bigTableColumnNames.length; i++) { + bigTableExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableTypeInfos[i], + testDesc.bigTableColumnNames[i], "B", false)); + } + + Map> exprMap = new HashMap>(); + exprMap.put((byte) 0, bigTableExpr); + + List smallTableKeyExpr = new ArrayList(); + + for (int i = 0; i < testDesc.smallTableKeyTypeInfos.length; i++) { + ExprNodeColumnDesc exprNodeColumnDesc = + new ExprNodeColumnDesc( + testDesc.smallTableKeyTypeInfos[i], + testDesc.smallTableKeyColumnNames[i], "S", false); + smallTableKeyExpr.add(exprNodeColumnDesc); + } + // Retained Small Table keys and values. List smallTableExpr = new ArrayList(); - for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { - smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false)); + final int smallTableRetainKeySize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < smallTableRetainKeySize; i++) { + int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableKeyColumnNum], + testDesc.smallTableColumnNames[smallTableKeyColumnNum], "S", false)); + } + + final int smallTableRetainValueSize = testDesc.smallTableRetainValueColumnNums.length; + for (int i = 0; i < smallTableRetainValueSize; i++) { + int smallTableValueColumnNum = + smallTableRetainKeySize + testDesc.smallTableRetainValueColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableValueColumnNum], + testDesc.smallTableColumnNames[smallTableValueColumnNum], "S", false)); } - keyMap.put((byte)1, smallTableExpr); + + keyMap.put((byte) 1, smallTableKeyExpr); + exprMap.put((byte) 1, smallTableExpr); mapJoinDesc.setKeys(keyMap); - mapJoinDesc.setExprs(keyMap); + mapJoinDesc.setExprs(exprMap); Byte[] order = new Byte[] {(byte) 0, (byte) 1}; mapJoinDesc.setTagOrder(order); - mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER); + mapJoinDesc.setNoOuterJoin( + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER && + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.FULL_OUTER); Map> filterMap = new HashMap>(); filterMap.put((byte) 0, new ArrayList()); // None. mapJoinDesc.setFilters(filterMap); List bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums); - - // For now, just small table values... - List smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums); - Map> retainListMap = new HashMap>(); retainListMap.put((byte) 0, bigTableRetainColumnNumsList); - retainListMap.put((byte) 1, smallTableRetainColumnNumsList); + + // For now, just small table keys/values... + if (testDesc.smallTableRetainKeyColumnNums.length == 0) { + + // Just the value columns numbers with retain. + List smallTableValueRetainColumnNumsList = + intArrayToList(testDesc.smallTableRetainValueColumnNums); + + retainListMap.put((byte) 1, smallTableValueRetainColumnNumsList); + } else { + + // Both the key/value columns numbers. + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + ArrayList smallTableValueIndicesNumsList = new ArrayList();; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(testDesc.smallTableRetainKeyColumnNums[i]); + } + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(-testDesc.smallTableRetainValueColumnNums[i] - 1); + } + int[] smallTableValueIndicesNums = + ArrayUtils.toPrimitive(smallTableValueIndicesNumsList.toArray(new Integer[0])); + + Map valueIndicesMap = new HashMap(); + valueIndicesMap.put((byte) 1, smallTableValueIndicesNums); + mapJoinDesc.setValueIndices(valueIndicesMap); + } mapJoinDesc.setRetainList(retainListMap); + switch (testDesc.mapJoinPlanVariation) { + case DYNAMIC_PARTITION_HASH_JOIN: + // FULL OUTER which behaves differently for dynamic partition hash join. + mapJoinDesc.setDynamicPartitionHashJoin(true); + break; + case SHARED_SMALL_TABLE: + mapJoinDesc.setDynamicPartitionHashJoin(false); + break; + default: + throw new RuntimeException( + "Unexpected map join plan variation " + testDesc.mapJoinPlanVariation); + } + int joinDescType; switch (testDesc.vectorMapJoinVariation) { case INNER: @@ -141,6 +320,9 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { case OUTER: joinDescType = JoinDesc.LEFT_OUTER_JOIN; break; + case FULL_OUTER: + joinDescType = JoinDesc.FULL_OUTER_JOIN; + break; default: throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation); } @@ -149,12 +331,25 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { mapJoinDesc.setConds(conds); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils - .getFieldSchemasFromColumnList(keyExpr, "")); + .getFieldSchemasFromColumnList(smallTableKeyExpr, "")); mapJoinDesc.setKeyTblDesc(keyTableDesc); + // Small Table expression value columns. + List smallTableValueExpr = new ArrayList(); + + // All Small Table keys and values. + for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { + smallTableValueExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableValueTypeInfos[i], + testDesc.smallTableValueColumnNames[i], "S", false)); + } + TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc( - PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, "")); + PlanUtils.getFieldSchemasFromColumnList(smallTableValueExpr, "")); ArrayList valueTableDescsList = new ArrayList(); + + // Big Table entry, then Small Table entry. valueTableDescsList.add(null); valueTableDescsList.add(valueTableDesc); mapJoinDesc.setValueTblDescs(valueTableDescsList); @@ -180,6 +375,7 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t hashTableKind = HashTableKind.HASH_SET; break; case OUTER: + case FULL_OUTER: hashTableKind = HashTableKind.HASH_MAP; break; default: @@ -190,9 +386,17 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t if (testDesc.bigTableKeyTypeInfos.length == 1) { switch (((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) { case BOOLEAN: + hashTableKeyType = HashTableKeyType.BOOLEAN; + break; case BYTE: + hashTableKeyType = HashTableKeyType.BYTE; + break; case SHORT: + hashTableKeyType = HashTableKeyType.SHORT; + break; case INT: + hashTableKeyType = HashTableKeyType.INT; + break; case LONG: hashTableKeyType = HashTableKeyType.LONG; break; @@ -216,49 +420,112 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorDesc.setAllBigTableKeyExpressions(null); - vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); - vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); - vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); + vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums); + vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos); vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); vectorDesc.setAllBigTableValueExpressions(null); + vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]); + + + /* + * Column mapping. + */ + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); + + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + int nextOutputColumn = 0; - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); - for (int i = 0; i < testDesc.bigTableTypeInfos.length; i++) { - bigTableRetainedMapping.add(i, i, testDesc.bigTableTypeInfos[i]); - projectionMapping.add(i, i, testDesc.bigTableKeyTypeInfos[i]); + final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainedSize; i++) { + final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i]; + TypeInfo typeInfo = testDesc.bigTableTypeInfos[i]; + projectionMapping.add( + nextOutputColumn, batchColumnIndex, typeInfo); + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + + // Tolerate repeated use of a big table column. + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + nextOutputColumn++; } - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + int emulateScratchColumn = testDesc.bigTableTypeInfos.length; + + VectorColumnOutputMapping smallTableKeyOutputMapping = + new VectorColumnOutputMapping("Small Table Key Output Mapping"); + final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum]; + TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]; + if (!isOuterJoin) { + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo); + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) { + nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo); + } + } else { + outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo); + + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + emulateScratchColumn++; + } + nextOutputColumn++; + } // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); - int outputColumn = testDesc.bigTableTypeInfos.length; + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { - smallTableMapping.add(i, outputColumn, testDesc.smallTableValueTypeInfos[i]); - projectionMapping.add(outputColumn, outputColumn, testDesc.smallTableValueTypeInfos[i]); - outputColumn++; + smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + emulateScratchColumn++; + nextOutputColumn++; } // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); + + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - smallTableMapping.finalize(); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -267,7 +534,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorMapJoinInfo.setProjectionMapping(projectionMapping); - assert projectionMapping.getCount() == testDesc.outputColumnNames.length; + if (projectionMapping.getCount() != testDesc.outputColumnNames.length) { + throw new RuntimeException(); + }; vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -306,6 +575,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterLongOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -331,6 +611,16 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( operator = new VectorMapJoinOuterStringOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); @@ -358,6 +648,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterMultiKeyOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -365,16 +666,31 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( default: throw new RuntimeException("Unknown hash table key type " + vectorDesc.getHashTableKeyType()); } + System.out.println("*BENCHMARK* createNativeVectorMapJoinOperator " + + operator.getClass().getSimpleName()); return operator; } public static VectorizationContext createVectorizationContext(MapJoinTestDescription testDesc) throws HiveException { VectorizationContext vContext = - new VectorizationContext("test", testDesc.bigTableColumnNamesList); + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + if (isOuterJoin) { + + // We need physical columns. + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyRetainColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + vContext.allocateScratchColumn(testDesc.smallTableKeyTypeInfos[smallTableKeyRetainColumnNum]); + } + } // Create scratch columns to hold small table results. - for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } return vContext; @@ -390,19 +706,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi final Byte smallTablePos = 1; - // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here??? TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc(); AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance( BinarySortableSerDe.class, null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); - TableDesc valueTableDesc; + final List valueTableDescList; if (mapJoinDesc.getNoOuterJoin()) { - valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueTblDescs(); } else { - valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueFilteredTblDescs(); } + TableDesc valueTableDesc = valueTableDescList.get(smallTablePos); AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance( valueTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); @@ -414,16 +730,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi } public static void connectOperators( - MapJoinTestDescription testDesc, Operator operator, - Operator testCollectorOperator) throws HiveException { - Operator[] parents = new Operator[] {operator}; - testCollectorOperator.setParentOperators(Arrays.asList(parents)); - Operator[] childOperators = new Operator[] {testCollectorOperator}; - operator.setChildOperators(Arrays.asList(childOperators)); - HiveConf.setBoolVar(testDesc.hiveConf, - HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + Operator childOperator) throws HiveException { + + List> newParentOperators = newOperatorList(); + newParentOperators.addAll(childOperator.getParentOperators()); + newParentOperators.add(operator); + childOperator.setParentOperators(newParentOperators); + + List> newChildOperators = newOperatorList(); + newChildOperators.addAll(operator.getChildOperators()); + newChildOperators.add(childOperator); + operator.setChildOperators(newChildOperators); + } private static List intArrayToList(int[] intArray) { @@ -509,9 +828,25 @@ private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJ mapJoinTableContainer.seal(); } - public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) + public static class CreateMapJoinResult { + public final MapJoinOperator mapJoinOperator; + public final MapJoinTableContainer mapJoinTableContainer; + public final MapJoinTableContainerSerDe mapJoinTableContainerSerDe; + + public CreateMapJoinResult( + MapJoinOperator mapJoinOperator, + MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + this.mapJoinOperator = mapJoinOperator; + this.mapJoinTableContainer = mapJoinTableContainer; + this.mapJoinTableContainerSerDe = mapJoinTableContainerSerDe; + } + } + public static CreateMapJoinResult createMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { final Byte bigTablePos = 0; @@ -539,11 +874,16 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, operator = new MapJoinOperator(new CompilationOpContext()); operator.setConf(mapJoinDesc); } else { - VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList); + VectorizationContext vContext = + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + /* + // UNDONE: Unclear this belonds in the input VectorizationContext... // Create scratch columns to hold small table results. for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } + */ // This is what the Vectorizer class does. VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc(); @@ -571,21 +911,20 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, } } - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); - - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - return operator; + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); } - public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) + public static CreateMapJoinResult createNativeVectorMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc); - - // UNDONE mapJoinDesc.setVectorDesc(vectorDesc); vectorDesc.setHashTableImplementationType(hashTableImplementationType); @@ -593,13 +932,14 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); MapJoinTableContainer mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null; switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: mapJoinTableContainer = new MapJoinBytesTableContainer( testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0); - MapJoinTableContainerSerDe mapJoinTableContainerSerDe = + mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc); mapJoinTableContainer.setSerde( @@ -615,7 +955,11 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType()); } - loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// if (shareMapJoinTableContainer == null) { + loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// } else { +// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer); +// } VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc); @@ -636,56 +980,295 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t vectorDesc, vContext); - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } - return operator; + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc) + throws SerDeException, IOException, HiveException { + return createMapJoinImplementation( + mapJoinImplementation, testDesc, testData, mapJoinDesc, null); } - public static MapJoinOperator createMapJoinImplementation(MapJoinTestImplementation mapJoinImplementation, + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, - Operator testCollectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc) throws SerDeException, IOException, HiveException { + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, + MapJoinTableContainer shareMapJoinTableContainer) + throws SerDeException, IOException, HiveException { - MapJoinOperator operator; + CreateMapJoinResult result; switch (mapJoinImplementation) { case ROW_MODE_HASH_MAP: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ true); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ true, + shareMapJoinTableContainer); break; case ROW_MODE_OPTIMIZED: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ false, + shareMapJoinTableContainer); break; case VECTOR_PASS_THROUGH: // VectorMapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ true, - /* n/a */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ true, + /* n/a */ false, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_OPTIMIZED: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.OPTIMIZED); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.OPTIMIZED, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_FAST: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.FAST); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.FAST, + shareMapJoinTableContainer); break; default: throw new RuntimeException("Unexpected MapJoin Operator Implementation " + mapJoinImplementation); } - return operator; + return result; + } + + private static Operator makeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + String[] outputColumnNames, TypeInfo[] outputTypeInfos) { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + List selectExprList = new ArrayList(); + List selectOutputColumnNameList = new ArrayList(); + for (int i = 0; i < bigTableRetainSize; i++) { + String selectOutputColumnName = "_col" + i; + selectOutputColumnNameList.add(selectOutputColumnName); + + TypeInfo outputTypeInfo = outputTypeInfos[i]; + if (i < bigTableKeySize) { + + // Big Table key. + ExprNodeColumnDesc keyColumnExpr = + new ExprNodeColumnDesc( + outputTypeInfo, + outputColumnNames[i], "test", false); + selectExprList.add(keyColumnExpr); + } else { + + // For row-mode, substitute NULL constant for any non-key extra Big Table columns. + ExprNodeConstantDesc nullExtraColumnExpr = + new ExprNodeConstantDesc( + outputTypeInfo, + null); + nullExtraColumnExpr.setFoldedFromCol(outputColumnNames[i]); + selectExprList.add(nullExtraColumnExpr); + } + } + + SelectDesc selectDesc = new SelectDesc(selectExprList, selectOutputColumnNameList); + Operator selectOperator = + OperatorFactory.get(new CompilationOpContext(), selectDesc); + + return selectOperator; + } + + private static Operator vectorizeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + Operator selectOperator) throws HiveException{ + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + + SelectDesc selectDesc = (SelectDesc) selectOperator.getConf(); + List selectExprs = selectDesc.getColList(); + + VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize]; + for (int i = 0; i < bigTableRetainSize; i++) { + + TypeInfo typeInfo = selectExprs.get(i).getTypeInfo(); + if (i < bigTableKeySize) { + + // Big Table key. + selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i)); + } else { + + // For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify + // the batch). + + // FULL OUTER INTERCEPT does not look at non-key columns. + + NoOpExpression noOpExpression = new NoOpExpression(i); + + noOpExpression.setInputTypeInfos(typeInfo); + noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + + noOpExpression.setOutputTypeInfo(typeInfo); + noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + selectVectorExpr[i] = noOpExpression; + } + } + + System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + + Arrays.toString(selectVectorExpr)); + + int[] projectedColumns = + ArrayUtils.toPrimitive( + vOutContext.getProjectedColumns().subList(0, bigTableRetainSize). + toArray(new Integer[0])); + System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + + Arrays.toString(projectedColumns)); + + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + vectorSelectDesc.setSelectExpressions(selectVectorExpr); + vectorSelectDesc.setProjectedOutputColumns(projectedColumns); + + Operator vectorSelectOperator = OperatorFactory.getVectorOperator( + selectOperator.getCompilationOpContext(), selectDesc, + vOutContext, vectorSelectDesc); + + return vectorSelectOperator; + } + + public static CountCollectorTestOperator addFullOuterIntercept( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, + MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) + throws SerDeException, IOException, HiveException { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + // For FULL OUTER MapJoin, we require all Big Keys to be present in the output result. + // The first N output columns are the Big Table key columns. + Map> keyMap = mapJoinDesc.getKeys(); + List bigTableKeyExprs = keyMap.get((byte) 0); + final int bigTableKeySize = bigTableKeyExprs.size(); + + Map> retainMap = mapJoinDesc.getRetainList(); + List bigTableRetainList = retainMap.get((byte) 0); + final int bigTableRetainSize = bigTableRetainList.size(); + + List outputColumnNameList = mapJoinDesc.getOutputColumnNames(); + String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]); + + // Use a utility method to get the MapJoin output TypeInfo. + TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc); + + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); + + /* + * Always create a row-mode SelectOperator. If we are vector-mode, next we will use its + * expressions and replace it with a VectorSelectOperator. + */ + Operator selectOperator = + makeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, + mapJoinOutputColumnNames, mapJoinOutputTypeInfos); + + List selectOutputColumnNameList = + ((SelectDesc) selectOperator.getConf()).getOutputColumnNames(); + String[] selectOutputColumnNames = + selectOutputColumnNameList.toArray(new String[0]); + + if (isVectorOutput) { + selectOperator = + vectorizeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator); + } + + /* + * Create test description just for FULL OUTER INTERCEPT with different + */ + MapJoinTestDescription interceptTestDesc = + new MapJoinTestDescription( + testDesc.hiveConf, testDesc.vectorMapJoinVariation, + selectOutputColumnNames, + Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), + testDesc.bigTableKeyColumnNums, + testDesc.smallTableValueTypeInfos, + testDesc.smallTableRetainKeyColumnNums, + testDesc.smallTableGenerationParameters, + testDesc.mapJoinPlanVariation); + + MapJoinDesc intersectMapJoinDesc = + createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */ true); + + /* + * Create FULL OUTER INTERSECT MapJoin operator. + */ + CreateMapJoinResult interceptCreateMapJoinResult = + createMapJoinImplementation( + mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc); + MapJoinOperator intersectMapJoinOperator = + interceptCreateMapJoinResult.mapJoinOperator; + MapJoinTableContainer intersectMapJoinTableContainer = + interceptCreateMapJoinResult.mapJoinTableContainer; + MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = + interceptCreateMapJoinResult.mapJoinTableContainerSerDe; + + connectOperators(mapJoinOperator, selectOperator); + + connectOperators(selectOperator, intersectMapJoinOperator); + + CountCollectorTestOperator interceptTestCollectorOperator; + if (!isVectorOutput) { + interceptTestCollectorOperator = + new TestMultiSetCollectorOperator( + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vContext = + ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext(); + int[] intersectProjectionColumns = + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0])); + interceptTestCollectorOperator = + new TestMultiSetVectorCollectorOperator( + intersectProjectionColumns, + interceptTestDesc.outputTypeInfos, + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator); + + // Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc. + intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors); + + // Now, invoke initializeOp methods from the root MapJoin operator. + mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables container references to our test data. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + intersectMapJoinOperator.setTestMapJoinTableContainer( + 1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe); + + return interceptTestCollectorOperator; + } + + private static List> newOperatorList() { + return new ArrayList>(); } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java index d763695..4994e9e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Random; import java.util.Map.Entry; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -58,6 +60,8 @@ HashMap smallTableKeyHashMap; + List fullOuterAdditionalSmallTableKeys; + ArrayList smallTableValueCounts; ArrayList> smallTableValues; @@ -68,83 +72,128 @@ public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, this.smallTableRandomSeed = smallTableRandomSeed; - generateTypes = generateTypesFromTypeInfos(testDesc.bigTableTypeInfos); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + generateTypes = generateTypesFromTypeInfos( + testDesc.bigTableTypeInfos, + testDesc.bigTableKeyColumnNums.length, + isOuterJoin); generator = new VectorBatchGenerator(generateTypes); bigTableBatch = generator.createBatch(); // Add small table result columns. - ColumnVector[] newCols = new ColumnVector[bigTableBatch.cols.length + testDesc.smallTableValueTypeInfos.length]; + + // Only [FULL] OUTER MapJoin needs a physical column. + final int smallTableRetainKeySize = + (isOuterJoin ? testDesc.smallTableRetainKeyColumnNums.length : 0); + ColumnVector[] newCols = + new ColumnVector[ + bigTableBatch.cols.length + + smallTableRetainKeySize + + testDesc.smallTableValueTypeInfos.length]; System.arraycopy(bigTableBatch.cols, 0, newCols, 0, bigTableBatch.cols.length); + int colIndex = bigTableBatch.cols.length; + + if (isOuterJoin) { + for (int s = 0; s < smallTableRetainKeySize; s++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[s]; + newCols[colIndex++] = + VectorizedBatchUtil.createColumnVector( + testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]); + } + } for (int s = 0; s < testDesc.smallTableValueTypeInfos.length; s++) { - newCols[bigTableBatch.cols.length + s] = + newCols[colIndex++] = VectorizedBatchUtil.createColumnVector(testDesc.smallTableValueTypeInfos[s]); } bigTableBatch.cols = newCols; bigTableBatch.numCols = newCols.length; - + // This stream will be restarted with the same random seed over and over. bigTableBatchStream = new VectorBatchGenerateStream( bigTableRandomSeed, generator, rowCount); - VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + VectorExtractRow keyVectorExtractRow = new VectorExtractRow(); + keyVectorExtractRow.init(testDesc.bigTableKeyTypeInfos, testDesc.bigTableKeyColumnNums); smallTableGenerationParameters = testDesc.getSmallTableGenerationParameters(); + HashMap bigTableKeyHashMap = new HashMap(); smallTableKeyHashMap = new HashMap(); + Random smallTableRandom = new Random(smallTableRandomSeed); // Start small table random generation // from beginning. ValueOption valueOption = smallTableGenerationParameters.getValueOption(); - int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); + if (valueOption != ValueOption.NO_REGULAR_SMALL_KEYS) { + int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - bigTableBatch.reset(); - bigTableBatchStream.fillNext(bigTableBatch); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + bigTableBatch.reset(); + bigTableBatchStream.fillNext(bigTableBatch); - final int size = bigTableBatch.size; - for (int i = 0; i < size; i++) { - - if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { - RowTestObjects testKey = getTestKey(bigTableBatch, i, vectorExtractRow, + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); + bigTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); + + if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { - if (valueOption == ValueOption.ONLY_ONE) { - if (smallTableKeyHashMap.containsKey(testKey)) { - continue; + if (valueOption == ValueOption.ONLY_ONE) { + if (smallTableKeyHashMap.containsKey(testKey)) { + continue; + } } + smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } - smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } } } //--------------------------------------------------------------------------------------------- - // UNDONE: For now, don't add more small keys... - /* - // Add more small table keys that are not in Big Table batches. - final int smallTableAdditionalLength = 1 + random.nextInt(4); - final int smallTableAdditionalSize = smallTableAdditionalLength * maxBatchSize; - VectorizedRowBatch[] smallTableAdditionalBatches = createBigTableBatches(generator, smallTableAdditionalLength); - for (int i = 0; i < smallTableAdditionalLength; i++) { - generator.generateBatch(smallTableAdditionalBatches[i], random, maxBatchSize); + // Add more small table keys that are not in Big Table or Small Table for FULL OUTER. + + fullOuterAdditionalSmallTableKeys = new ArrayList(); + + VectorBatchGenerateStream altBigTableBatchStream = + new VectorBatchGenerateStream( + smallTableRandomSeed, generator, 100); + altBigTableBatchStream.reset(); + while (altBigTableBatchStream.isNext()) { + bigTableBatch.reset(); + altBigTableBatchStream.fillNext(bigTableBatch); + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, + testDesc.bigTableKeyTypeInfos.length, + testDesc.bigTableObjectInspectors); + if (bigTableKeyHashMap.containsKey(testKey) || + smallTableKeyHashMap.containsKey(testKey)) { + continue; + } + RowTestObjects testKeyClone = (RowTestObjects) testKey.clone(); + smallTableKeyHashMap.put(testKeyClone, -1); + fullOuterAdditionalSmallTableKeys.add(testKeyClone); + } } - TestRow[] additionalTestKeys = getTestKeys(smallTableAdditionalBatches, vectorExtractRow, - testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); - final int smallTableAdditionKeyProbes = smallTableAdditionalSize / 2; - for (int i = 0; i < smallTableAdditionKeyProbes; i++) { - int index = random.nextInt(smallTableAdditionalSize); - TestRow additionalTestKey = additionalTestKeys[index]; - smallTableKeyHashMap.put((TestRow) additionalTestKey.clone(), -1); + + // Make sure there is a NULL key. + Object[] nullKeyRowObjects = new Object[testDesc.bigTableKeyTypeInfos.length]; + RowTestObjects nullTestKey = new RowTestObjects(nullKeyRowObjects); + if (!smallTableKeyHashMap.containsKey(nullTestKey)) { + smallTableKeyHashMap.put(nullTestKey, -1); + fullOuterAdditionalSmallTableKeys.add(nullTestKey); } - */ // Number the test rows with collection order. int addCount = 0; @@ -177,9 +226,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes MapJoinOperator operator) throws HiveException { VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); - final int columnCount = testDesc.bigTableKeyTypeInfos.length; + final int columnCount = testDesc.bigTableTypeInfos.length; Object[] row = new Object[columnCount]; testData.bigTableBatchStream.reset(); @@ -194,7 +243,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes operator.process(row, 0); } } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJoinTestData testData, @@ -207,7 +258,9 @@ public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJ operator.process(testData.bigTableBatch, 0); } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void generateVariationData(MapJoinTestData testData, @@ -219,6 +272,7 @@ public static void generateVariationData(MapJoinTestData testData, break; case INNER: case OUTER: + case FULL_OUTER: testData.generateRandomSmallTableCounts(testDesc, random); testData.generateRandomSmallTableValues(testDesc, random); break; @@ -230,10 +284,15 @@ public static void generateVariationData(MapJoinTestData testData, private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescription testDesc, Random random) { final int columnCount = testDesc.smallTableValueTypeInfos.length; - Object[] smallTableValueRow = VectorRandomRowSource.randomWritablePrimitiveRow(columnCount, random, - testDesc.smallTableValuePrimitiveTypeInfos); + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + for (int i = 0; i < columnCount; i++) { + primitiveTypeInfos[i] = (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[i]; + } + Object[] smallTableValueRow = + VectorRandomRowSource.randomWritablePrimitiveRow( + columnCount, random, primitiveTypeInfos); for (int c = 0; c < smallTableValueRow.length; c++) { - smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableObjectInspectors[c]).copyObject(smallTableValueRow[c]); + smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableValueObjectInspectors[c]).copyObject(smallTableValueRow[c]); } return new RowTestObjects(smallTableValueRow); } @@ -241,7 +300,7 @@ private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescri private void generateRandomSmallTableCounts(MapJoinTestDescription testDesc, Random random) { smallTableValueCounts = new ArrayList(); for (Entry testKeyEntry : smallTableKeyHashMap.entrySet()) { - final int valueCount = 1 + random.nextInt(19); + final int valueCount = 1 + random.nextInt(3); smallTableValueCounts.add(valueCount); } } @@ -258,14 +317,26 @@ private void generateRandomSmallTableValues(MapJoinTestDescription testDesc, Ran } } - private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos) { + private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos, + int keyCount, boolean isOuterJoin) { final int size = typeInfos.length; GenerateType[] generateTypes = new GenerateType[size]; for (int i = 0; i < size; i++) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[i]; GenerateCategory category = - GenerateCategory.generateCategoryFromPrimitiveCategory(primitiveTypeInfo.getPrimitiveCategory()); - generateTypes[i] = new GenerateType(category); + GenerateCategory.generateCategoryFromPrimitiveCategory( + primitiveTypeInfo.getPrimitiveCategory()); + final boolean allowNulls; + if (i >= keyCount) { + + // Value columns can be NULL. + allowNulls = true; + } else { + + // Non-OUTER JOIN operators expect NULL keys to have been filtered out. + allowNulls = isOuterJoin; + } + generateTypes[i] = new GenerateType(category, allowNulls); } return generateTypes; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..b8a08fd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,20 +28,23 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { + DYNAMIC_PARTITION_HASH_JOIN, + SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +86,103 @@ public int getNoMatchKeyOutOfAThousand() { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { + this( + hiveConf, + vectorMapJoinVariation, + /* bigTableColumnNames */ null, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + } + public MapJoinTestDescription ( HiveConf hiveConf, VectorMapJoinVariation vectorMapJoinVariation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + String[] bigTableColumnNames, + TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) { + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { super(hiveConf); + this.vectorMapJoinVariation = vectorMapJoinVariation; this.bigTableColumnNames = bigTableColumnNames; this.bigTableTypeInfos = bigTableTypeInfos; this.bigTableKeyColumnNums = bigTableKeyColumnNums; - this.smallTableValueColumnNames = smallTableValueColumnNames; + this.smallTableValueTypeInfos = smallTableValueTypeInfos; - this.bigTableRetainColumnNums = bigTableRetainColumnNums; - this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums; - this.smallTableRetainValueColumnNums = smallTableRetainValueColumnNums; + + this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums;; this.smallTableGenerationParameters = smallTableGenerationParameters; - switch (vectorMapJoinVariation) { - case INNER_BIG_ONLY: - case LEFT_SEMI: - trimAwaySmallTableValueInfo(); - break; - case INNER: - case OUTER: - break; - default: - throw new RuntimeException("Unknown operator variation " + vectorMapJoinVariation); - } + this.mapJoinPlanVariation = mapJoinPlanVariation; computeDerived(); } @@ -155,45 +192,121 @@ public SmallTableGenerationParameters getSmallTableGenerationParameters() { } public void computeDerived() { - bigTableColumnNamesList = Arrays.asList(bigTableColumnNames); - bigTableKeyColumnNames = new String[bigTableKeyColumnNums.length]; - bigTableKeyTypeInfos = new TypeInfo[bigTableKeyColumnNums.length]; - for (int i = 0; i < bigTableKeyColumnNums.length; i++) { - bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNums[i]]; - bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNums[i]]; + final int bigTableSize = bigTableTypeInfos.length; + + if (bigTableKeyColumnNames == null) { + + // Automatically populate. + bigTableColumnNames = new String[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNames[i] = "_col" + i; + } } - smallTableValueColumnNamesList = Arrays.asList(smallTableValueColumnNames); + // Automatically populate. + bigTableColumnNums = new int[bigTableSize]; - bigTableObjectInspectors = new ObjectInspector[bigTableTypeInfos.length]; - for (int i = 0; i < bigTableTypeInfos.length; i++) { + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNums[i] = i; + } + + // Automatically populate. + bigTableRetainColumnNums = new int[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableRetainColumnNums[i] = i; + } + + /* + * Big Table key information. + */ + final int keySize = bigTableKeyColumnNums.length; + + bigTableKeyColumnNames = new String[keySize]; + bigTableKeyTypeInfos = new TypeInfo[keySize]; + for (int i = 0; i < keySize; i++) { + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNum]; + bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNum]; + } + + /* + * Big Table object inspectors. + */ + bigTableObjectInspectors = new ObjectInspector[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { bigTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((PrimitiveTypeInfo) bigTableTypeInfos[i]); + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) bigTableTypeInfos[i]); + } + bigTableColumnNameList = Arrays.asList(bigTableColumnNames); + bigTableObjectInspectorList = Arrays.asList(bigTableObjectInspectors); + + /* + * Small Table key object inspectors are derived directly from the Big Table key information. + */ + smallTableKeyColumnNames = new String[keySize]; + smallTableKeyTypeInfos = Arrays.copyOf(bigTableKeyTypeInfos, keySize); + smallTableKeyObjectInspectors = new ObjectInspector[keySize]; + for (int i = 0; i < keySize; i++) { + smallTableKeyColumnNames[i] = "_col" + i; + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + smallTableKeyObjectInspectors[i] = bigTableObjectInspectors[bigTableKeyColumnNum]; + } + smallTableKeyColumnNameList = Arrays.asList(smallTableKeyColumnNames); + smallTableKeyObjectInspectorList = Arrays.asList(smallTableKeyObjectInspectors); + + // First part of Small Table information is the key information. + smallTableColumnNameList = new ArrayList(smallTableKeyColumnNameList); + List smallTableTypeInfoList = + new ArrayList(Arrays.asList(smallTableKeyTypeInfos)); + smallTableObjectInspectorList = new ArrayList(); + smallTableObjectInspectorList.addAll(smallTableKeyObjectInspectorList); + + final int valueSize = smallTableValueTypeInfos.length; + + // Automatically populate. + smallTableValueColumnNames = new String[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueColumnNames[i] = "_col" + (keySize + i); } - bigTableObjectInspectorsList = Arrays.asList(bigTableObjectInspectors); - smallTableObjectInspectors = new ObjectInspector[smallTableValueTypeInfos.length]; - smallTablePrimitiveCategories = new PrimitiveCategory[smallTableValueTypeInfos.length]; - smallTableValuePrimitiveTypeInfos = new PrimitiveTypeInfo[smallTableValueTypeInfos.length]; - for (int i = 0; i < smallTableValueTypeInfos.length; i++) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) smallTableValueTypeInfos[i]; - smallTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); - smallTablePrimitiveCategories[i] = primitiveTypeInfo.getPrimitiveCategory(); - smallTableValuePrimitiveTypeInfos[i] = primitiveTypeInfo; + smallTableValueObjectInspectors = new ObjectInspector[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueObjectInspectors[i] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) smallTableValueTypeInfos[i]); } - smallTableObjectInspectorsList = Arrays.asList(smallTableObjectInspectors); + smallTableValueColumnNameList = Arrays.asList(smallTableValueColumnNames); + smallTableTypeInfoList.addAll(Arrays.asList(smallTableValueTypeInfos)); + smallTableValueObjectInspectorList = Arrays.asList(smallTableValueObjectInspectors); + smallTableColumnNameList.addAll(smallTableValueColumnNameList); + smallTableColumnNames = smallTableColumnNameList.toArray(new String[0]); + smallTableTypeInfos = smallTableTypeInfoList.toArray(new TypeInfo[0]); + + smallTableObjectInspectorList.addAll(smallTableValueObjectInspectorList); + + /* + * The inputObjectInspectors describe the keys and values of the Big Table and Small Table. + */ bigTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - bigTableColumnNamesList, Arrays.asList((ObjectInspector[]) bigTableObjectInspectors)); + bigTableColumnNameList, bigTableObjectInspectorList); smallTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - smallTableValueColumnNamesList, Arrays.asList((ObjectInspector[]) smallTableObjectInspectors)); + smallTableColumnNameList, smallTableObjectInspectorList); inputObjectInspectors = - new ObjectInspector[] { bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + new ObjectInspector[] { + bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + + // For now, we always retain the Small Table values... + // Automatically populate. + smallTableRetainValueColumnNums = new int[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableRetainValueColumnNums[i] = i; + } int outputLength = bigTableRetainColumnNums.length + @@ -203,12 +316,13 @@ public void computeDerived() { outputTypeInfos = new TypeInfo[outputLength]; int outputIndex = 0; - for (int i = 0; i < bigTableRetainColumnNums.length; i++) { + final int bigTableRetainSize = bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainSize; i++) { outputTypeInfos[outputIndex++] = bigTableTypeInfos[bigTableRetainColumnNums[i]]; } - // for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { - // outputTypeInfos[outputIndex++] = smallTableTypeInfos[smallTableRetainKeyColumnNums[i]]; - // } + for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { + outputTypeInfos[outputIndex++] = smallTableKeyTypeInfos[smallTableRetainKeyColumnNums[i]]; + } for (int i = 0; i < smallTableRetainValueColumnNums.length; i++) { outputTypeInfos[outputIndex++] = smallTableValueTypeInfos[smallTableRetainValueColumnNums[i]]; } @@ -221,13 +335,6 @@ public void computeDerived() { } } - public void trimAwaySmallTableValueInfo() { - smallTableValueColumnNames = new String[] {}; - smallTableValueTypeInfos = new TypeInfo[] {}; - smallTableRetainKeyColumnNums = new int[] {}; - smallTableRetainValueColumnNums = new int[] {}; - } - private String[] createOutputColumnNames(int outputColumnCount) { String[] outputColumnNames = new String[outputColumnCount]; int counter = 1; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java new file mode 100644 index 0000000..fdd0342 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * An expression representing a column, only children are evaluated. + */ +public class NoOpExpression extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public NoOpExpression() { + } + + public NoOpExpression(int colNum) { + super(colNum); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + } + + @Override + public String vectorExpressionParameters() { + return "noOpCol" + outputColumnNum + ":" + + getTypeName(outputTypeInfo, outputDataTypePhysicalVariation); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()).build(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..4d25589 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -57,8 +58,13 @@ import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -69,7 +75,6 @@ import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; @@ -86,14 +91,13 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BytesWritable; @@ -101,6 +105,7 @@ import org.apache.hive.common.util.HashCodeUtil; import org.apache.hive.common.util.ReflectionUtil; import org.junit.Test; +import org.junit.Ignore; import java.io.IOException; import java.util.ArrayList; @@ -120,233 +125,1350 @@ public class TestMapJoinOperator { - /* - * This test collector operator is for MapJoin row-mode. - */ - private class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, false); + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, true); + break; + case 2: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + private boolean goodTestVariation(MapJoinTestDescription testDesc) { + final int smallTableValueSize = testDesc.smallTableRetainValueColumnNums.length; + + switch (testDesc.vectorMapJoinVariation) { + case INNER: + return (smallTableValueSize > 0); + case INNER_BIG_ONLY: + case LEFT_SEMI: + return (smallTableValueSize == 0); + case OUTER: + return true; + case FULL_OUTER: + return true; + default: + throw new RuntimeException( + "Unexpected vectorMapJoinVariation " + testDesc.vectorMapJoinVariation); + } + + } + + @Test + @Ignore + public void testLong0() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong0"); + + return false; + } + + @Test + @Ignore + public void testLong0_NoRegularKeys() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong0_NoRegularKeys"); + + return false; + } + + @Test + @Ignore + public void testLong1() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong1(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, long value; Small Table: no key retained, string value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong1"); + + return false; + } + + @Test + public void testLong2() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong2(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: short key, no value; Small Table: key retained, timestamp value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.timestampTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong2"); + + return false; + } + + + @Test + public void testLong3() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong3"); + + return false; + } + + @Test + public void testLong3_NoRegularKeys() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong3_NoRegularKeys"); + + return false; + } + + @Test + public void testLong4() throws Exception { + long seed = 3982; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong4(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, no value; Small Table: no key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong4"); + + return false; + } + + @Test + public void testLong5() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong5(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong5"); + + return false; + } + + @Test + public void testLong6() throws Exception { + long seed = 9384; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, timestamp value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.timestampTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong6"); + + return false; + } + + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + @Test + public void testMultiKey0() throws Exception { + long seed = 28322; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey0(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; - private final RowTestObjectsMultiSet testRowMultiSet; + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.intTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; - public TestMultiSetCollectorOperator( - ObjectInspector[] outputObjectInspectors, - RowTestObjectsMultiSet testRowMultiSet) { - super(outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + smallTableRetainKeyColumnNums = new int[] {0, 1}; - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; - } + smallTableValueTypeInfos = new TypeInfo[] {}; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + //---------------------------------------------------------------------------------------------- - @Override - public String getName() { - return TestMultiSetCollectorOperator.class.getSimpleName(); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; + } + + @Test + public void testMultiKey1() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - private class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + public boolean doTestMultiKey1(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - private final RowTestObjectsMultiSet testRowMultiSet; + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, - ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) - throws HiveException { - super(outputTypeInfos, outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + TypeInfo[] bigTableTypeInfos = null; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + int[] bigTableKeyColumnNums = null; - @Override - public String getName() { - return TestMultiSetVectorCollectorOperator.class.getSimpleName(); - } - } + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; - private static class KeyConfig { - long seed; - PrimitiveTypeInfo primitiveTypeInfo; - KeyConfig(long seed, PrimitiveTypeInfo primitiveTypeInfo) { - this.seed = seed; - this.primitiveTypeInfo = primitiveTypeInfo; + smallTableValueTypeInfos = + new TypeInfo[] {new DecimalTypeInfo(38, 18)}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey1"); + + return false; } - private static KeyConfig[] longKeyConfigs = new KeyConfig[] { - new KeyConfig(234882L, TypeInfoFactory.longTypeInfo), - new KeyConfig(4600L, TypeInfoFactory.intTypeInfo), - new KeyConfig(98743L, TypeInfoFactory.shortTypeInfo)}; @Test - public void testLong() throws Exception { - for (KeyConfig longKeyConfig : longKeyConfigs) { + public void testMultiKey2() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + hiveConfVariationsDone = + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); } - doTestLong(longKeyConfig.seed, longKeyConfig.primitiveTypeInfo, vectorMapJoinVariation); } - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestLong(long seed, TypeInfo numberTypeInfo, - VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestMultiKey2(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"number1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.longTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testMultiKey0"); + + return false; } @Test - public void testMultiKey() throws Exception { + public void testString0() throws Exception { long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestMultiKey(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestMultiKey(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString0(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; - TypeInfo[] bigTableTypeInfos = + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One plain STRING key column. + bigTableTypeInfos = new TypeInfo[] { - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + bigTableKeyColumnNums = new int[] {0}; - String[] smallTableValueColumnNames = new String[] {"sv1"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testString0"); + + return false; + } + + @Test + public void testString1() throws Exception { + long seed = 3422; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestString1(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + int rowCount = 10; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0}; + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One BINARY key column. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.binaryTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.floatTypeInfo, + new DecimalTypeInfo(38, 18)}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString1"); + + return false; } @Test - public void testString() throws Exception { - long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + public void testString2() throws Exception { + long seed = 7439; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestString(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString2(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; - int[] bigTableRetainColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One STRING key column; Small Table value: NONE (tests INNER_BIG_ONLY, LEFT_SEMI). + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString2"); + + return false; } private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTableRowObjects, @@ -357,14 +1479,32 @@ private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTa } } - private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, - Object[] outputObjects) { + private void addToOutput(MapJoinTestDescription testDesc, + RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects, + RowTestObjectsMultiSet.RowFlag rowFlag) { for (int c = 0; c < outputObjects.length; c++) { - PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); + PrimitiveObjectInspector primitiveObjInsp = + ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); Object outputObject = outputObjects[c]; outputObjects[c] = primitiveObjInsp.copyObject(outputObject); } - expectedTestRowMultiSet.add(new RowTestObjects(outputObjects)); + expectedTestRowMultiSet.add(new RowTestObjects(outputObjects), rowFlag); + } + + private String rowToCsvString(Object[] rowObjects) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rowObjects.length; i++) { + if (sb.length() > 0) { + sb.append(","); + } + Object obj = rowObjects[i]; + if (obj == null) { + sb.append("\\N"); + } else { + sb.append(obj); + } + } + return sb.toString(); } /* @@ -377,7 +1517,7 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet(); VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; Object[] bigTableRowObjects = new Object[bigTableColumnCount]; @@ -397,20 +1537,26 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); // Form key object array + boolean hasAnyNulls = false; // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins. for (int k = 0; k < bigTableKeyColumnCount; k++) { int keyColumnNum = testDesc.bigTableKeyColumnNums[k]; - bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum]; + Object keyObject = bigTableRowObjects[keyColumnNum]; + if (keyObject == null) { + hasAnyNulls = true; + } + bigTableKeyObjects[k] = keyObject; bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]); } RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects); - if (testData.smallTableKeyHashMap.containsKey(testKey)) { + if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) { int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey); switch (testDesc.vectorMapJoinVariation) { case INNER: case OUTER: + case FULL_OUTER: { // One row per value. ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); @@ -420,36 +1566,46 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + Object[] valueRow = valueList.get(v).getRow(); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } } break; case INNER_BIG_ONLY: - { - // Value count rows. - final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex); - for (int v = 0; v < valueCount; v++) { - Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; - - addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); - } - } - break; case LEFT_SEMI: { - // One row (existence). Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } break; default: @@ -458,9 +1614,10 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript } else { - // No match. + // Big Table non-match. - if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) { + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { // We need to add a non-match row with nulls for small table values. @@ -468,14 +1625,74 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = null; + } + + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = null; + outputObjects[outputColumnNum++] = null; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.LEFT_OUTER); + } + } + } + } + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + System.out.println("*BENCHMARK* ----------------------------------------------------------------------"); + System.out.println("*BENCHMARK* FULL OUTER non-match key count " + + testData.fullOuterAdditionalSmallTableKeys.size()); + + // Fill in non-match Small Table key results. + for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) { + + System.out.println( + "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString()); + + int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey); + + // One row per value. + ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; + + // Non-match Small Table keys produce NULL Big Table columns. + final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; + for (int o = 0; o < bigTableRetainColumnNumsLength; o++) { + outputObjects[o] = null; + } + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + // The output result may include 0, 1, or more small key columns... + Object[] smallKeyObjects = smallTableKey.getRow(); + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; } + + Object[] valueRow = valueList.get(v).getRow(); + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; + for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.FULL_OUTER); } } } @@ -483,67 +1700,336 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript return expectedTestRowMultiSet; } - private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception { + private void generateBigAndSmallTableRowLogLines(MapJoinTestDescription testDesc, + MapJoinTestData testData) throws HiveException { + + // Generate Big Table rows log lines... + VectorExtractRow vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(testDesc.bigTableTypeInfos); + + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + Object[] bigTableRowObjects = new Object[bigTableColumnCount]; + + VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream(); + VectorizedRowBatch batch = testData.getBigTableBatch(); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + batch.reset(); + bigTableBatchStream.fillNext(batch); + + final int size = testData.bigTableBatch.size; + for (int r = 0; r < size; r++) { + vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); + + System.out.println("*BIG TABLE* " + rowToCsvString(bigTableRowObjects)); + } + } + + // Generate Small Table rows log lines... + final int keyKeyColumnNumsLength = + testDesc.bigTableKeyColumnNums.length; + final int smallTableRetainValueLength = + testDesc.smallTableRetainValueColumnNums.length; + final int smallTableLength = keyKeyColumnNumsLength + smallTableRetainValueLength; + for (Entry entry : testData.smallTableKeyHashMap.entrySet()) { + if (smallTableRetainValueLength == 0) { + Object[] smallTableRowObjects = entry.getKey().getRow(); + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } else { + Integer valueIndex = entry.getValue(); + ArrayList valueList = testData.smallTableValues.get(valueIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] smallTableRowObjects = new Object[smallTableLength]; + System.arraycopy(entry.getKey().getRow(), 0, smallTableRowObjects, 0, keyKeyColumnNumsLength); + int outputColumnNum = keyKeyColumnNumsLength; + Object[] valueRow = valueList.get(v).getRow(); + for (int o = 0; o < smallTableRetainValueLength; o++) { + smallTableRowObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } + } + } + } + + private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + + // So stack trace is self-explanatory. + switch (testDesc.vectorMapJoinVariation) { + case INNER: + executeTestInner(testDesc, testData, title); + break; + case INNER_BIG_ONLY: + executeTestInnerBigOnly(testDesc, testData, title); + break; + case LEFT_SEMI: + executeTestLeftSemi(testDesc, testData, title); + break; + case OUTER: + executeTestOuter(testDesc, testData, title); + break; + case FULL_OUTER: + executeTestFullOuter(testDesc, testData, title); + break; + default: + throw new RuntimeException("Unexpected Vector MapJoin variation " + + testDesc.vectorMapJoinVariation); + } + } + + private void executeTestInner(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestInnerBigOnly(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestLeftSemi(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestFullOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData); - // UNDONE: Inner count - System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + - " totalCount " + expectedTestRowMultiSet.getTotalCount()); + generateBigAndSmallTableRowLogLines(testDesc, testData); + + System.out.println("*BENCHMARK* expectedTestRowMultiSet " + + " totalKeyCount " + expectedTestRowMultiSet.getTotalKeyCount() + + " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); // Execute all implementation variations. for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { - executeTestImplementation(mapJoinImplementation, testDesc, testData, - expectedTestRowMultiSet); + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Key match tracking not supported in plain Java HashMap. + continue; + } + switch (mapJoinImplementation) { + case ROW_MODE_HASH_MAP: + executeRowModeHashMap( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case ROW_MODE_OPTIMIZED: + executeRowModeOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case VECTOR_PASS_THROUGH: + executeVectorPassThrough( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_OPTIMIZED: + executeNativeVectorOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_FAST: + executeNativeVectorFast( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + default: + throw new RuntimeException( + "Unexpected vector map join test variation"); + } } } - private boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { - return - (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && - mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + private void executeRowModeHashMap( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_HASH_MAP, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeRowModeOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeVectorPassThrough( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.VECTOR_PASS_THROUGH, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorFast( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_FAST, + testDesc, testData, + expectedTestRowMultiSet, + title); } private void executeTestImplementation( MapJoinTestImplementation mapJoinImplementation, - MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) throws Exception { - System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test"); + System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + + " title " + title); // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); - final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet(); - Operator testCollectorOperator = - (!isVectorOutput ? - new TestMultiSetCollectorOperator( - testDesc.outputObjectInspectors, outputTestRowMultiSet) : - new TestMultiSetVectorCollectorOperator( - testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet)); - - MapJoinOperator operator = + CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc); + MapJoinOperator mapJoinOperator = result.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe; + + CountCollectorTestOperator testCollectorOperator; + if (!isVectorOutput) { + testCollectorOperator = + new TestMultiSetCollectorOperator( + testDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + testCollectorOperator = + new TestMultiSetVectorCollectorOperator( + ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), + testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator); + + CountCollectorTestOperator interceptTestCollectorOperator = null; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + + if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Not supported. + return; + } + + // Wire in FULL OUTER Intercept. + interceptTestCollectorOperator = + MapJoinTestConfig.addFullOuterIntercept( + mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, + mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } else { + + // Invoke initializeOp methods. + mapJoinOperator.initialize( + testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + } if (!isVectorOutput) { - MapJoinTestData.driveBigTableData(testDesc, testData, operator); + MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator); } else { - MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator); + MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator); + } + + if (!testCollectorOperator.getIsClosed()) { + Assert.fail("collector operator not closed"); + } + if (testCollectorOperator.getIsAborted()) { + Assert.fail("collector operator aborted"); + } + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + if (!interceptTestCollectorOperator.getIsClosed()) { + Assert.fail("intercept collector operator not closed"); + } + if (interceptTestCollectorOperator.getIsAborted()) { + Assert.fail("intercept collector operator aborted"); + } } System.out.println("*BENCHMARK* executeTestImplementation row count " + - ((CountCollectorTestOperator) testCollectorOperator).getRowCount()); + testCollectorOperator.getRowCount()); // Verify the output! - if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) { - System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation); + String option = ""; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name(); + } + if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) { + System.out.println("*BENCHMARK* " + title + " verify failed" + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); + expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); } else { - System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation); + System.out.println("*BENCHMARK* " + title + " verify succeeded " + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); } } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 09dcb83..3ce061d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -30,11 +30,15 @@ import junit.framework.TestCase; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; @@ -197,6 +201,20 @@ public long getKey(int index) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastLongHashMap map, int index, MatchTracker matchTracker) { + FastLongHashMapElement element = array[index]; + long longKey = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(longKey, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastLongHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -204,18 +222,77 @@ public void verify(VectorMapJoinFastLongHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(long searchLong) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastLongHashMapElement element = array[index]; - long key = element.getKey(); - List values = element.getValues(); + long longKey = element.getKey(); + if (longKey == searchLong) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastLongHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + long longKey = nonMatchedIterator.getNonMatchedLongKey(); + int index = findKeyInArray(longKey); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastLongHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } @@ -247,6 +324,11 @@ public int getValueCount() { public void addValue(byte[] value) { values.add(value); } + + @Override + public String toString() { + return "Key length " + key.length + ", value count " + values.size(); + } } /* @@ -310,6 +392,21 @@ public void add(byte[] key, byte[] value) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastBytesHashMap map, int index, + MatchTracker matchTracker) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastBytesHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -317,18 +414,82 @@ public void verify(VectorMapJoinFastBytesHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(byte[] searchKeyBytes, int searchKeyOffset, int searchKeyLength) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastBytesHashMapElement element = array[index]; - byte[] key = element.getKey(); - List values = element.getValues(); + byte[] keyBytes = element.getKey(); + if (keyBytes.length == searchKeyLength && + StringExpr.equal( + keyBytes, 0, keyBytes.length, + searchKeyBytes, searchKeyOffset, searchKeyLength)) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastBytesHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();; + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + int index = findKeyInArray(keyBytes, keyOffset, keyLength); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastBytesHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 0000000..6833553 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index cbd77d1..fb8be91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -37,7 +37,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -65,7 +65,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -91,7 +91,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -125,7 +125,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(CAPACITY, 1f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(1, 0.0000001f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -227,7 +227,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -242,7 +242,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java new file mode 100644 index 0000000..8e53501 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(33221); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testExpand() throws Exception { + random = new Random(5227); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java index bbb5da0..f64d180 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -39,7 +39,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -66,7 +66,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -94,7 +94,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -126,7 +126,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -225,7 +225,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -240,7 +240,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java index 4412425..ff993aa 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java @@ -600,8 +600,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index 793a676..ab1a829 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,8 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -109,15 +111,25 @@ public static GenerateCategory generateCategoryFromPrimitiveCategory(PrimitiveCa } private GenerateCategory category; + private boolean allowNulls; public GenerateType(GenerateCategory category) { this.category = category; } + public GenerateType(GenerateCategory category, boolean allowNulls) { + this.category = category; + this.allowNulls = allowNulls; + } + public GenerateCategory getCategory() { return category; } + public boolean getAllowNulls() { + return allowNulls; + } + /* * BOOLEAN .. LONG: Min and max. */ @@ -189,16 +201,24 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, break; case STRING: + case CHAR: + case VARCHAR: + case BINARY: colVector = new BytesColumnVector(); break; - // UNDONE - case DATE: case TIMESTAMP: - case BINARY: + colVector = new TimestampColumnVector(); + break; + case DECIMAL: - case VARCHAR: - case CHAR: + colVector = new DecimalColumnVector(38, 18); + break; + + // UNDONE + case DATE: + + case LIST: case MAP: case STRUCT: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index 1064b19..22a1cd8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -22,15 +22,19 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -39,6 +43,7 @@ private GenerateType[] generateTypes; private int[] columnNums; private Object[] arrays; + private boolean[][] isNullArrays; public VectorColumnGroupGenerator(int columnNum, GenerateType generateType) { columnNums = new int[] {columnNum}; @@ -61,6 +66,7 @@ public VectorColumnGroupGenerator(int startColumnNum, GenerateType[] generateTyp private void allocateArrays(int size) { arrays = new Object[generateTypes.length]; + isNullArrays = new boolean[generateTypes.length][]; for (int i = 0; i < generateTypes.length; i++) { GenerateType generateType = generateTypes[i]; GenerateCategory category = generateType.getCategory(); @@ -90,24 +96,34 @@ private void allocateArrays(int size) { case STRING: array = new String[size]; break; + case BINARY: + array = new byte[size][]; + break; case TIMESTAMP: array = new Timestamp[size]; break; + case CHAR: + array = new HiveChar[size]; + break; + case VARCHAR: + array = new HiveVarchar[size]; + break; + case DECIMAL: + array = new HiveDecimalWritable[size]; + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unexpected generate category " + category); } arrays[i] = array; + isNullArrays[i] = new boolean[size]; } } @@ -141,16 +157,24 @@ public void clearColumnValueArrays() { case STRING: Arrays.fill(((String[]) array), null); break; + case BINARY: + Arrays.fill(((byte[][]) array), null); + break; case TIMESTAMP: Arrays.fill(((Timestamp[]) array), null); break; + case CHAR: + Arrays.fill(((HiveChar[]) array), null); + break; + case VARCHAR: + Arrays.fill(((HiveVarchar[]) array), null); + break; + case DECIMAL: + Arrays.fill(((HiveDecimalWritable[]) array), null); + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -170,6 +194,11 @@ public void generateRowValues(int rowIndex, Random random) { private void generateRowColumnValue(int rowIndex, int columnIndex, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + if (allowNulls && random.nextInt(100) < 5) { + isNullArrays[columnIndex][rowIndex] = true; + return; + } Object array = arrays[columnIndex]; switch (category) { case BOOLEAN: @@ -230,6 +259,13 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case BINARY: + { + byte[] value = RandomTypeUtil.getRandBinary(random, 10); + ((byte[][]) array)[rowIndex] = value; + } + break; + case TIMESTAMP: { Timestamp value = RandomTypeUtil.getRandTimestamp(random); @@ -237,14 +273,36 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case CHAR: + { + // UNDONE: Use CharTypeInfo.maxLength + HiveChar value = + new HiveChar(RandomTypeUtil.getRandString(random), 10); + ((HiveChar[]) array)[rowIndex] = value; + } + break; + + case VARCHAR: + { + // UNDONE: Use VarcharTypeInfo.maxLength + HiveVarchar value = + new HiveVarchar(RandomTypeUtil.getRandString(random), 10); + ((HiveVarchar[]) array)[rowIndex] = value; + } + break; + + case DECIMAL: + { + HiveDecimalWritable value = + new HiveDecimalWritable(RandomTypeUtil.getRandHiveDecimal(random)); + ((HiveDecimalWritable[]) array)[rowIndex] = value; + } + break; + // UNDONE case DATE: // UNDONE: Needed to longTest? - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -263,7 +321,15 @@ public void fillDownRowValues(int rowIndex, int seriesCount, Random random) { private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCount, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); Object array = arrays[columnIndex]; + boolean[] isNull = isNullArrays[columnIndex]; + if (allowNulls && isNull[rowIndex]) { + for (int i = 1; i < seriesCount; i++) { + isNull[rowIndex + i] = true; + } + return; + } switch (category) { case BOOLEAN: { @@ -337,6 +403,15 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + byte[] value = byteArrayArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + byteArrayArray[rowIndex + i] = value; + } + } + break; case TIMESTAMP: { Timestamp[] timestampArray = ((Timestamp[]) array); @@ -346,15 +421,37 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + HiveChar value = hiveCharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveCharArray[rowIndex + i] = value; + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveVarcharArray = ((HiveVarchar[]) array); + HiveVarchar value = hiveVarcharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveVarcharArray[rowIndex + i] = value; + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + HiveDecimalWritable value = hiveDecimalWritableArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveDecimalWritableArray[rowIndex + i] = value; + } + } + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: @@ -389,6 +486,16 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde GenerateType generateType = generateTypes[logicalColumnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + boolean[] isNull = isNullArrays[logicalColumnIndex]; + if (allowNulls) { + for (int i = 0; i < size; i++) { + if (isNull[i]) { + colVector.isNull[i] = true; + colVector.noNulls = false; + } + } + } Object array = arrays[logicalColumnIndex]; switch (category) { case BOOLEAN: @@ -396,7 +503,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde boolean[] booleanArray = ((boolean[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = (booleanArray[i] ? 1 : 0); + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = (booleanArray[i] ? 1 : 0); + } } } break; @@ -405,7 +516,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde byte[] byteArray = ((byte[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = byteArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = byteArray[i]; + } } } break; @@ -414,7 +529,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde short[] shortArray = ((short[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = shortArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = shortArray[i]; + } } } break; @@ -423,7 +542,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde int[] intArray = ((int[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = intArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = intArray[i]; + } } } break; @@ -432,7 +555,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde long[] longArray = ((long[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = longArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = longArray[i]; + } } } break; @@ -441,7 +568,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde float[] floatArray = ((float[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = floatArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = floatArray[i]; + } } } break; @@ -450,7 +581,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde double[] doubleArray = ((double[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = doubleArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = doubleArray[i]; + } } } break; @@ -459,8 +594,22 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde String[] stringArray = ((String[]) array); BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); for (int i = 0; i < size; i++) { - byte[] bytes = stringArray[i].getBytes(); - bytesColVec.setVal(i, bytes); + if (!isNull[i]) { + byte[] bytes = stringArray[i].getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = byteArrayArray[i]; + bytesColVec.setVal(i, bytes); + } } } break; @@ -469,8 +618,46 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde Timestamp[] timestampArray = ((Timestamp[]) array); TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector); for (int i = 0; i < size; i++) { - Timestamp timestamp = timestampArray[i]; - timestampColVec.set(i, timestamp); + if (!isNull[i]) { + Timestamp timestamp = timestampArray[i]; + timestampColVec.set(i, timestamp); + } + } + } + break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + HiveDecimalWritable decWritable = hiveDecimalWritableArray[i]; + decimalColVec.set(i, decWritable); + } } } break; @@ -479,16 +666,12 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unepected generate category " + category); } } } \ No newline at end of file diff --git ql/src/test/queries/clientpositive/auto_join_filters.q ql/src/test/queries/clientpositive/auto_join_filters.q index a44ffb3..9198cad 100644 --- ql/src/test/queries/clientpositive/auto_join_filters.q +++ ql/src/test/queries/clientpositive/auto_join_filters.q @@ -7,7 +7,13 @@ LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -24,10 +30,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); @@ -51,7 +66,12 @@ SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -68,10 +88,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/auto_join_nulls.q ql/src/test/queries/clientpositive/auto_join_nulls.q index 279fd32..b59f30e 100644 --- ql/src/test/queries/clientpositive/auto_join_nulls.q +++ ql/src/test/queries/clientpositive/auto_join_nulls.q @@ -19,10 +19,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/correlationoptimizer1.q ql/src/test/queries/clientpositive/correlationoptimizer1.q index d16904e..3f90077 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer1.q +++ ql/src/test/queries/clientpositive/correlationoptimizer1.q @@ -214,6 +214,7 @@ set hive.optimize.correlation=false; -- they share the same key. Because those keys with a null value are not grouped -- in the output of the Full Outer Join, we cannot use a single MR to execute -- these two operators. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -225,7 +226,35 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -236,6 +265,7 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; set hive.auto.convert.join=false; set hive.optimize.correlation=false; diff --git ql/src/test/queries/clientpositive/correlationoptimizer2.q ql/src/test/queries/clientpositive/correlationoptimizer2.q index eeccd24..7480346 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer2.q +++ ql/src/test/queries/clientpositive/correlationoptimizer2.q @@ -94,6 +94,7 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 set hive.optimize.correlation=false; -- Full Outer Join should be handled. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -107,7 +108,39 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -120,8 +153,28 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; set hive.optimize.correlation=false; + +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -136,11 +189,13 @@ FROM (SELECT a.key AS key, count(1) AS cnt FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.merge.nway.joins=false; set hive.optimize.correlation=true; -- After FULL OUTER JOIN, keys with null values are not grouped, right now, -- we have to generate 2 MR jobs for tmp, 1 MR job for a join b and another for the -- GroupByOperator on key. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -156,11 +211,30 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=false; -- When Correlation Optimizer is turned off, we need 4 MR jobs. -- When Correlation Optimizer is turned on, the subquery of tmp will be evaluated in -- a single MR job (including the subquery a, the subquery b, and a join b). So, we -- will have 2 MR jobs. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -174,7 +248,39 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -187,3 +293,4 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; diff --git ql/src/test/queries/clientpositive/correlationoptimizer4.q ql/src/test/queries/clientpositive/correlationoptimizer4.q index 02edeff..69d400c 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer4.q +++ ql/src/test/queries/clientpositive/correlationoptimizer4.q @@ -135,6 +135,19 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should not be optimized because afer the FULL OUTER JOIN, rows with null keys -- are not grouped. +set hive.auto.convert.join=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt @@ -147,6 +160,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; +set hive.auto.convert.join=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt @@ -157,3 +171,15 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..8862d0e --- /dev/null +++ ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -0,0 +1,603 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (NO HYBRID GRACE) +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (HYBRID GRACE) +------------------------------------------------------------------------------------------ + +set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/mapjoin46.q ql/src/test/queries/clientpositive/mapjoin46.q index 348dd67..171a6a9 100644 --- ql/src/test/queries/clientpositive/mapjoin46.q +++ ql/src/test/queries/clientpositive/mapjoin46.q @@ -2,6 +2,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1 (key INT, value INT, col_1 STRING); INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -172,6 +174,22 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -184,8 +202,23 @@ FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -196,8 +229,23 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and right input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -208,8 +256,25 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Keys plus residual (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -222,8 +287,51 @@ FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)); +SET hive.merge.nway.joins=true; -- Mixed ( FOJ (ROJ, LOJ) ) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM ( @@ -262,3 +370,4 @@ FULL OUTER JOIN ( OR test2.key between 100 and 102)) ) sq2 ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); +SET hive.merge.nway.joins=true; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index ff4cde2..ee9a89c 100644 --- ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -22,3 +22,9 @@ select a.* from alltypesorc a left outer join src b on a.cint = cast(b.key as int) and (a.cint < 100) limit 1; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1; diff --git ql/src/test/queries/clientpositive/vector_full_outer_join.q ql/src/test/queries/clientpositive/vector_full_outer_join.q new file mode 100644 index 0000000..cc77488 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -0,0 +1,82 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- SORT_QUERY_RESULTS + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +SET hive.mapjoin.full.outer=true; + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +-- Omit tjoin2.c1 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +-- Omit tjoin2.c1 and tjoin2.c2 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q new file mode 100644 index 0000000..1b9448b --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -0,0 +1,454 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for FAST hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (NO HYBRID GRACE) +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (HYBRID GRACE) +------------------------------------------------------------------------------------------ + +-- Since FAST Hash Table does not support Hybrid Grace, we omit this section. + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..0f21231 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -0,0 +1,603 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (NO HYBRID GRACE) +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (HYBRID GRACE) +------------------------------------------------------------------------------------------ + +set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q new file mode 100644 index 0000000..3828f48 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -0,0 +1,603 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized PASS-TRUE Mode MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (NO HYBRID GRACE) +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN (HYBRID GRACE) +------------------------------------------------------------------------------------------ + +set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 6557a71..9108c89 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -10,7 +10,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; -explain vectorization expression +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -18,14 +18,14 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); -explain vectorization expression +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -33,116 +33,238 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +----------------- + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN @@ -150,12 +272,33 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); + +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); diff --git ql/src/test/queries/clientpositive/vector_join_filters.q ql/src/test/queries/clientpositive/vector_join_filters.q index aac10c1..329eb25 100644 --- ql/src/test/queries/clientpositive/vector_join_filters.q +++ ql/src/test/queries/clientpositive/vector_join_filters.q @@ -14,7 +14,10 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -26,10 +29,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/vector_join_nulls.q ql/src/test/queries/clientpositive/vector_join_nulls.q index b978b41..de41592 100644 --- ql/src/test/queries/clientpositive/vector_join_nulls.q +++ ql/src/test/queries/clientpositive/vector_join_nulls.q @@ -14,7 +14,11 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b; + SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value; @@ -23,9 +27,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; + +EXPLAIN VECTORIZATION OPERATOR SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; + +EXPLAIN VECTORIZATION OPERATOR +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index ccceb36..84f656b 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; @@ -20,14 +21,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +37,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +45,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +53,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +61,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index 8469a06..490cabd 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -3,6 +3,7 @@ set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; +SET hive.merge.nway.joins=false; -- SORT_QUERY_RESULTS @@ -26,167 +27,190 @@ select * from t4; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization only summary - +explain vectorization expression select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization only summary +explain vectorization expression select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization only summary +explain vectorization expression select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization only summary +explain vectorization expression select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization only summary +explain vectorization expression select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization only summary +explain vectorization expression select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=false; +explain vectorization expression select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=true; +explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +-- Verify this works (FULL OUTER MapJoin is not enabled for N-way) +SET hive.merge.nway.joins=true; +explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; + +explain vectorization expression select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization only summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization expression +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization only summary +SET hive.mapjoin.full.outer=true; +explain vectorization expression select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization only summary +explain vectorization expression select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; -explain vectorization summary +explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization summary +explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization summary +explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization summary +explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization summary +explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization summary +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization summary +explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; - -explain vectorization summary +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization summary +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +-- select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization summary +explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; @@ -250,21 +274,29 @@ explain vectorization only operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization only operator +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +-- select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; explain vectorization only operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +-- select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; @@ -276,85 +308,94 @@ select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 10 set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; @@ -362,166 +403,183 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; +set hive.llap.enable.grace.join.in.llap=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index 6a7ff72..2d7155e 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -38,7 +38,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins @@ -65,7 +69,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins diff --git ql/src/test/queries/clientpositive/vectorized_join46.q ql/src/test/queries/clientpositive/vectorized_join46.q index af155cc..7be2b0e 100644 --- ql/src/test/queries/clientpositive/vectorized_join46.q +++ ql/src/test/queries/clientpositive/vectorized_join46.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; @@ -15,7 +16,7 @@ INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), -- Basic outer join -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); @@ -25,7 +26,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); -- Conjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -39,7 +40,7 @@ ON (test1.value=test2.value AND test2.key between 100 and 102); -- Conjunction with pred on single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 @@ -51,7 +52,7 @@ ON (test1.key between 100 and 102 AND test2.key between 100 and 102); -- Conjunction with pred on multiple inputs and none (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); @@ -61,7 +62,7 @@ FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); -- Condition on one input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); @@ -71,7 +72,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -85,7 +86,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -97,7 +98,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -109,7 +110,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -123,7 +124,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -137,7 +138,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -149,7 +150,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -161,7 +162,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -175,7 +176,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -189,7 +190,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -201,7 +202,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -213,7 +214,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value diff --git ql/src/test/queries/clientpositive/vectorized_join46_mr.q ql/src/test/queries/clientpositive/vectorized_join46_mr.q new file mode 100644 index 0000000..7be2b0e --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_join46_mr.q @@ -0,0 +1,228 @@ +set hive.cli.print.header=true; +set hive.vectorized.execution.enabled=true; +set hive.auto.convert.join=true; +set hive.strict.checks.cartesian.product=false; +set hive.join.emit.interval=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE test1 (key INT, value INT, col_1 STRING); +INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); + +CREATE TABLE test2 (key INT, value INT, col_2 STRING); +INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None'); + + +-- Basic outer join +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +-- Conjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on multiple inputs and none (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +-- Condition on one input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index 9173e7a..5d09142 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -874,7 +874,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/auto_join18.q.out ql/src/test/results/clientpositive/auto_join18.q.out index 0039983..94c2661 100644 --- ql/src/test/results/clientpositive/auto_join18.q.out +++ ql/src/test/results/clientpositive/auto_join18.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out index 4025f06..c233273 100644 --- ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join6.q.out ql/src/test/results/clientpositive/auto_join6.q.out index 166ecda..e9a3a5f 100644 --- ql/src/test/results/clientpositive/auto_join6.q.out +++ ql/src/test/results/clientpositive/auto_join6.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join7.q.out ql/src/test/results/clientpositive/auto_join7.q.out index c8f7144..5853d3c 100644 --- ql/src/test/results/clientpositive/auto_join7.q.out +++ ql/src/test/results/clientpositive/auto_join7.q.out @@ -107,7 +107,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out index e455524..f03c381 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out @@ -272,7 +272,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -493,7 +493,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out index 1dfacda..5d4468b 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -456,7 +456,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out index cf4c744..1a0a0cf 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -453,7 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 4b1313d..16137dd 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/cbo_rp_join1.q.out ql/src/test/results/clientpositive/cbo_rp_join1.q.out index 03ca51f..1e25a6f 100644 --- ql/src/test/results/clientpositive/cbo_rp_join1.q.out +++ ql/src/test/results/clientpositive/cbo_rp_join1.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -155,7 +155,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} {(VALUE._col1 = 40)} 1 {(VALUE._col0 = 40)} @@ -257,7 +257,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -359,7 +359,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 34a9216..1a58dca 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -1081,7 +1081,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) diff --git ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out new file mode 100644 index 0000000..0ae9df9 --- /dev/null +++ ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: s + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index 6a4fa34..f28ca6c 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -458,7 +458,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18.q.out ql/src/test/results/clientpositive/join18.q.out index 3d5a90f..f64df58 100644 --- ql/src/test/results/clientpositive/join18.q.out +++ ql/src/test/results/clientpositive/join18.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18_multi_distinct.q.out ql/src/test/results/clientpositive/join18_multi_distinct.q.out index b064af2..d9fa1ec 100644 --- ql/src/test/results/clientpositive/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/join18_multi_distinct.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out index f2cca31..91f7259 100644 --- ql/src/test/results/clientpositive/join45.q.out +++ ql/src/test/results/clientpositive/join45.q.out @@ -1359,7 +1359,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1464,7 +1464,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join46.q.out ql/src/test/results/clientpositive/join46.q.out index 0847ca6..c676c58 100644 --- ql/src/test/results/clientpositive/join46.q.out +++ ql/src/test/results/clientpositive/join46.q.out @@ -1421,7 +1421,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1520,7 +1520,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1617,7 +1617,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1716,7 +1716,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1874,7 +1874,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/join47.q.out ql/src/test/results/clientpositive/join47.q.out index 4a13df8..67ae836 100644 --- ql/src/test/results/clientpositive/join47.q.out +++ ql/src/test/results/clientpositive/join47.q.out @@ -1341,7 +1341,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1446,7 +1446,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join6.q.out ql/src/test/results/clientpositive/join6.q.out index 978cc45..69818e8 100644 --- ql/src/test/results/clientpositive/join6.q.out +++ ql/src/test/results/clientpositive/join6.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join7.q.out ql/src/test/results/clientpositive/join7.q.out index 91c4eb7..ac65f34 100644 --- ql/src/test/results/clientpositive/join7.q.out +++ ql/src/test/results/clientpositive/join7.q.out @@ -107,7 +107,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out index db0b26d..c9079d7 100644 --- ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -726,7 +726,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 89b7169..4a10d24 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1135,10 +1135,10 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 6, 2] + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1333,10 +1333,10 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1501,11 +1501,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1531,11 +1530,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1580,11 +1579,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -1614,6 +1612,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -1919,11 +1920,11 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 6, 2] + partitionColumns: 5:int + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2118,11 +2119,11 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [2, 3] + partitionColumns: 5:int + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2288,11 +2289,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2318,11 +2318,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2367,11 +2367,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -2401,6 +2400,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/auto_join_filters.q.out ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index 540612d..895fa4d 100644 --- ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -54,6 +54,153 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Reducer 4' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Reducer 4' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -198,6 +345,180 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -340,6 +661,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Reducer 4' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -484,6 +816,180 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 04da1f2..1e58597 100644 --- ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -188,6 +188,174 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 543fccd..3c53b18 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -235,25 +235,25 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,15 +823,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -840,10 +840,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1436,7 +1436,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1565,7 +1565,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 195 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1696,7 +1696,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 649 Data size: 5192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -1731,16 +1731,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1838,11 +1838,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1851,7 +1851,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 649 Data size: 5192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2007,7 +2007,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 649 Data size: 5192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2042,16 +2042,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2149,11 +2149,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 391 Data size: 1564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 1584 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2162,7 +2162,7 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 632 Data size: 5056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 649 Data size: 5192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -2566,11 +2566,11 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) @@ -2708,11 +2708,11 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 0 Map 2 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) @@ -2866,14 +2866,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2949,14 +2949,14 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Reducer 3 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3139,14 +3139,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3202,14 +3202,14 @@ STAGE PLANS: input vertices: 1 Map 2 2 Map 3 - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 619 Data size: 115134 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 627 Data size: 116622 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3304,12 +3304,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -3362,14 +3362,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3422,7 +3422,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 391 Data size: 37145 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 37620 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3432,14 +3432,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 618 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 626 Data size: 116436 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3586,14 +3586,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3675,14 +3675,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 195 Data size: 20085 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 198 Data size: 20394 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3799,14 +3799,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3882,14 +3882,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 247 Data size: 25441 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4178,14 +4178,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4258,14 +4258,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4293,7 +4293,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4309,11 +4309,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE @@ -4329,7 +4336,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -4355,16 +4362,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -4372,17 +4379,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4407,8 +4414,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4423,41 +4430,12 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 @@ -4480,6 +4458,25 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -4489,15 +4486,32 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4594,14 +4608,14 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4698,14 +4712,14 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 396 Data size: 73656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index fa90ccd..8b8a31a 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -178,7 +178,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -194,11 +194,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE @@ -214,7 +221,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -240,16 +247,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -257,17 +264,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,8 +299,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -308,41 +315,12 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 @@ -365,6 +343,25 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -374,15 +371,32 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 809 Data size: 6472 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1309 Data size: 10472 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1326 Data size: 10608 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -670,10 +684,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,10 +738,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -826,10 +840,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -880,10 +894,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -982,10 +996,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1055,10 +1069,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 267 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 270 Data size: 2160 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out index 08df574..33825da 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out @@ -71,10 +71,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index 61db738..bce5b09 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -483,22 +483,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -569,17 +569,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -609,7 +609,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash @@ -903,22 +903,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -989,17 +989,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1029,7 +1029,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index d6f541d..1ca074b 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -119,22 +119,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -283,17 +283,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -323,7 +323,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash @@ -545,22 +545,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -709,17 +709,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -749,7 +749,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash @@ -988,22 +988,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -1201,17 +1201,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 - Statistics: Num rows: 163 Data size: 93968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 163 Data size: 93968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 163 Data size: 93968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1241,7 +1241,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 163 Data size: 93968 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 15570 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index e37a618..34de5b1 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -221,22 +221,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -307,17 +307,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 Position of Big Table: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -347,7 +347,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash @@ -647,22 +647,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 149 Data size: 85004 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 14155 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 142 Data size: 81010 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 149 Data size: 28161 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -733,17 +733,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 Position of Big Table: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToString(_col0) (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -773,7 +773,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value1, value2 - Statistics: Num rows: 156 Data size: 89111 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 30977 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') mode: hash diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out index b138a2d..1c6ebb3 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out @@ -2203,7 +2203,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -2345,7 +2345,291 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index 64248e7..57faf1d 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -1093,7 +1093,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1263,7 +1263,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1339,20 +1339,18 @@ POSTHOOK: Input: default@src1 #### A masked pattern was here #### 12744278 500 652447 25 PREHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1364,140 +1362,126 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1505,42 +1489,38 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -12744278 310 +12744278 500 652447 25 PREHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1552,128 +1532,300 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 500 652447 25 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1715,6 +1867,932 @@ POSTHOOK: Input: default@src1 #### A masked pattern was here #### 12744278 310 PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1711763 3531902962 1711763 37 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1711763 3531902962 1711763 37 +PREHOOK: query: EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out index 3c9b6fc..acafc0d 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out @@ -1605,8 +1605,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1767,8 +1767,332 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/cross_prod_1.q.out ql/src/test/results/clientpositive/llap/cross_prod_1.q.out index fd03fe5..a7d9a94 100644 --- ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +++ ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -1971,7 +1971,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/explainuser_4.q.out ql/src/test/results/clientpositive/llap/explainuser_4.q.out index c4fdc18..c7544a4 100644 --- ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -270,7 +270,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] Map Join Operator [MAPJOIN_17] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 @@ -351,7 +351,7 @@ Stage-0 Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Map Join Operator [MAPJOIN_19] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 @@ -431,7 +431,7 @@ Stage-0 Group By Operator [GBY_10] (rows=1501 width=215) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Map Join Operator [MAPJOIN_21] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..76d69d1 --- /dev/null +++ ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,9653 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/join46.q.out ql/src/test/results/clientpositive/llap/join46.q.out index ecb34d1..d4175c0 100644 --- ql/src/test/results/clientpositive/llap/join46.q.out +++ ql/src/test/results/clientpositive/llap/join46.q.out @@ -1631,7 +1631,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1744,7 +1744,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1855,7 +1855,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1968,7 +1968,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -2138,7 +2138,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index ef1a6f3..f9c376f 100644 --- ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -224,6 +224,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -310,6 +311,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -397,6 +399,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -483,6 +486,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out index dbf531c..e32d3bd 100644 --- ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -115,10 +115,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -259,11 +259,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -369,10 +369,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out index 37c213b..ee48343 100644 --- ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -109,10 +109,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -253,11 +253,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -363,10 +363,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 799062e..e80ec79 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1645,13 +1645,15 @@ STAGE PLANS: 0 ctinyint (type: tinyint) 1 ctinyint (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [10] - bigTableRetainedColumnNums: [1, 6, 7, 10] - bigTableValueColumnNums: [1, 6, 7, 10] + bigTableKeyColumns: 10:tinyint + bigTableRetainColumnNums: [1, 6, 7, 10] + bigTableValueColumns: 1:int, 6:char(255), 7:varchar(255), 10:tinyint className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 6, 7, 10] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:int, 6:char(255), 7:varchar(255), 10:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 @@ -1706,10 +1708,9 @@ STAGE PLANS: Map-reduce partition columns: ctinyint (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [10] + keyColumns: 10:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) @@ -2115,10 +2116,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 694 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..86ea9ac 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -63,6 +63,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE @@ -242,6 +243,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/mapjoin2.q.out ql/src/test/results/clientpositive/llap/mapjoin2.q.out index 5d996c9..d454a18 100644 --- ql/src/test/results/clientpositive/llap/mapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin2.q.out @@ -46,7 +46,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl #### A masked pattern was here #### true true 2 two -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl where n = 1) a full outer join (select * from tbl where n = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl @@ -87,7 +88,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### 11 1 1 0 0 -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git ql/src/test/results/clientpositive/llap/mapjoin46.q.out ql/src/test/results/clientpositive/llap/mapjoin46.q.out index c6c34be..a463bf8 100644 --- ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -128,14 +128,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -238,12 +238,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -342,12 +342,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -436,10 +436,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product @@ -533,18 +533,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -642,19 +642,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -748,19 +748,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli 100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del -101 2 Car 105 NULL None -101 2 Car 104 3 Fli 101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -854,14 +854,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -959,13 +959,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1063,19 +1063,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -101 2 Car 103 2 Ema 100 1 Bob 103 2 Ema -99 2 Mat 103 2 Ema -101 2 Car 104 3 Fli 100 1 Bob 104 3 Fli -101 2 Car 105 NULL None 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1169,16 +1169,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -101 2 Car 103 2 Ema 100 1 Bob 103 2 Ema -99 2 Mat 103 2 Ema -101 2 Car 104 3 Fli 100 1 Bob 104 3 Fli -101 2 Car 105 NULL None 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1272,16 +1272,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del +101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -1379,9 +1379,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product @@ -1446,7 +1446,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1486,11 +1486,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1499,18 +1494,25 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1559,12 +1561,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1584,7 +1586,8 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1592,16 +1595,12 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1610,18 +1609,23 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1670,12 +1674,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1695,7 +1699,7 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1703,34 +1707,36 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -NULL NULL NULL 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1741,7 +1747,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1754,11 +1760,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1771,11 +1775,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1783,16 +1785,16 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1804,11 +1806,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1816,29 +1818,474 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, - test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 - FROM test1 RIGHT OUTER JOIN test2 - ON (test1.value=test2.value +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) ) sq1 @@ -1966,7 +2413,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -2032,23 +2479,244 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL 101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 96be039..ddabfcd 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -52,10 +52,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -106,10 +106,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator @@ -136,10 +136,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -178,6 +177,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -210,10 +212,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -435,10 +436,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -488,10 +488,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -530,6 +529,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1610,10 +1612,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1655,10 +1656,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1697,6 +1697,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1806,10 +1809,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1851,10 +1853,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1893,6 +1894,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2002,10 +2006,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2047,10 +2050,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2075,7 +2077,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -2089,6 +2091,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2209,10 +2214,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -2263,10 +2268,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -2292,10 +2296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -2346,10 +2349,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2398,6 +2400,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2417,6 +2422,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2487,10 +2495,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -2525,10 +2532,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -2608,10 +2614,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2661,10 +2666,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2703,6 +2707,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2824,10 +2831,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2877,10 +2883,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2930,10 +2935,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2983,10 +2987,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3022,6 +3025,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3041,6 +3047,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -3141,10 +3150,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3194,10 +3202,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3236,6 +3243,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3354,10 +3364,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -3408,10 +3418,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -3437,10 +3446,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -3491,10 +3499,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3543,6 +3550,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3562,6 +3572,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3632,10 +3645,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -3670,10 +3682,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -3761,10 +3772,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3814,10 +3824,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3867,10 +3876,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3920,10 +3928,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3959,6 +3966,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3978,6 +3988,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -4091,10 +4104,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -4143,10 +4155,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -4196,10 +4207,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap @@ -4220,6 +4230,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -4288,10 +4301,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/partialdhj.q.out ql/src/test/results/clientpositive/llap/partialdhj.q.out index 4e62c4f..460c02d 100644 --- ql/src/test/results/clientpositive/llap/partialdhj.q.out +++ ql/src/test/results/clientpositive/llap/partialdhj.q.out @@ -107,6 +107,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) @@ -144,6 +145,7 @@ STAGE PLANS: input vertices: 0 Reducer 3 Statistics: Num rows: 25 Data size: 6675 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -337,6 +339,7 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 25 Data size: 6675 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -358,6 +361,7 @@ STAGE PLANS: input vertices: 1 Map 6 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/semijoin.q.out ql/src/test/results/clientpositive/llap/semijoin.q.out index 82cee33..9711ab2 100644 --- ql/src/test/results/clientpositive/llap/semijoin.q.out +++ ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -1968,7 +1968,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2394,7 +2394,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index 89bdfd8..66bf118 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -582,7 +582,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1046,7 +1046,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1280,7 +1280,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1397,7 +1397,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1511,8 +1511,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index 06e4173..ec40621 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -582,7 +582,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1046,7 +1046,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1280,7 +1280,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1397,7 +1397,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1511,8 +1511,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index b63b25f..7492f64 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -491,6 +491,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -623,6 +624,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -756,6 +758,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 4f557d3..2abe505 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -204,3 +204,99 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 UDFToInteger(_col0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out index d0a5e62..d853612 100644 --- ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out @@ -491,6 +491,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -623,6 +624,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -756,6 +758,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 5d4bfe7..0fec415 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:decimal(38,18), 1:decimal(38,18), 2:decimal(38,18), 3:struct Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap @@ -286,10 +285,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:double, 1:double, 2:double, 3:struct Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) Execution mode: vectorized, llap @@ -417,10 +415,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:timestamp, 1:timestamp, 2:double, 3:struct Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c99ac8d..0c89015 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 54216fa..43db2ed 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -104,6 +104,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -236,6 +239,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -442,6 +448,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Map 5 Map Operator Tree: TableScan @@ -486,6 +495,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -541,6 +553,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -695,6 +710,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -832,6 +850,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -993,6 +1014,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1142,6 +1166,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1335,6 +1362,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1458,6 +1488,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1600,6 +1633,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1745,6 +1781,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1917,6 +1956,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-3 Dependency Collection @@ -2140,6 +2182,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index c85c59e..d4de843 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -196,6 +196,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -368,6 +371,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index aabfc73..1ab9448 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -155,6 +155,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 4 @@ -567,6 +568,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out index 7dde2ec..128fe4f 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -575,10 +575,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + valueColumns: 22:string, 24:string, 25:string, 26:date, 27:double, 28:double, 30:decimal(10,2), 31:decimal(10,2), 32:decimal(12,2), 33:decimal(12,2), 34:decimal(10,2), 35:decimal(10,2), 38:timestamp, 40:int, 43:int, 44:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) Execution mode: vectorized, llap @@ -950,10 +950,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + valueColumns: 27:string, 38:string, 48:string, 52:date, 54:double, 60:double, 63:decimal(10,2), 65:decimal(10,2), 67:decimal(12,2), 68:decimal(12,2), 69:decimal(10,2), 70:decimal(10,2), 73:timestamp, 76:int, 79:int, 80:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index d8ab3c5..4cdf6ea 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -415,10 +415,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [10, 12, 13, 14, 11, 7, 16, 23] + valueColumns: 10:string, 12:string, 13:string, 14:int, 11:string, 7:int, 16:int, 23:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap @@ -700,10 +700,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [15, 26, 36, 40, 42, 44, 46, 53] + valueColumns: 15:string, 26:string, 36:string, 40:int, 42:string, 44:int, 46:int, 53:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 72cd1d3..a1e61f7 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -177,6 +177,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -387,6 +388,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -557,6 +559,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 5279e77..c447748 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -99,14 +99,15 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 2] - smallTableMapping: [2] + projectedOutput: 0:bigint, 2:bigint + smallTableValueMapping: 2:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out index 409c68c..194929d 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -70,10 +70,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f2277c1..dc55271 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -690,10 +690,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:string, 1:map, 2:array, 3:struct Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) Execution mode: vectorized, llap @@ -730,10 +729,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -769,10 +766,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -810,10 +805,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -859,6 +853,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_create_complex + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-2 Dependency Collection @@ -949,10 +946,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -1174,10 +1170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 4721072 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -1317,10 +1313,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 7697400 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 98e7dc0..a7d0c32 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -72,6 +72,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Map 2 @@ -251,6 +252,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -394,6 +396,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 2a77c39..38a4c79 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -128,10 +128,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -293,10 +293,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -458,10 +458,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -623,10 +623,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -792,10 +792,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:date Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index 5107015..96fba4b 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -87,10 +87,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,10 +203,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -321,10 +319,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,10 +435,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +551,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -672,10 +667,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -789,10 +783,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -906,10 +899,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1023,10 +1015,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out index 5e835cd..a97329e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out @@ -76,10 +76,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -192,10 +191,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index bc596b3..45b3dc3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -65,10 +65,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -181,10 +180,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -297,10 +295,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -413,10 +410,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -529,10 +525,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -645,10 +640,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -761,10 +755,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -877,10 +870,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1004,10 +996,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1120,10 +1111,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1236,10 +1226,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1352,10 +1341,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1468,10 +1456,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1584,10 +1571,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1700,10 +1686,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1816,10 +1801,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 800a4ae..92874b3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -149,10 +149,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(10,5), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -290,10 +289,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(17,4), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -442,10 +440,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -486,10 +483,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -670,10 +666,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:decimal(11,5) Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(11,5)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 4bb8a01..894fcf8 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -100,10 +100,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -279,10 +279,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(23,14), 10:decimal(23,14), 11:decimal(33,14), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap @@ -465,10 +465,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -664,10 +664,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(16,0), 10:decimal(16,0), 11:decimal(26,0), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index 64433ea..d5a9173 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -83,10 +83,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + keyColumns: 4:decimal(25,14), 6:decimal(26,14), 8:decimal(38,13), 10:decimal(38,17), 11:decimal(12,10), 12:int, 13:smallint, 14:tinyint, 15:bigint, 16:boolean, 17:double, 18:float, 19:string, 20:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -256,10 +255,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + keyColumns: 4:decimal(11,3), 6:decimal(11,3), 8:decimal(21,11), 10:decimal(23,9), 11:decimal(5,3), 12:int, 13:smallint, 14:tinyint, 15:bigint, 16:boolean, 17:double, 18:float, 19:string, 20:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 87fc687..28b1740 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -193,10 +193,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -457,10 +456,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(24,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,0)) Execution mode: vectorized, llap @@ -799,10 +798,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1065,10 +1063,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap @@ -1332,10 +1330,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1598,10 +1595,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 18b903b..88a0444 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -601,10 +601,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:struct, 1:decimal(30,10) Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap @@ -1211,10 +1210,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:struct, 1:decimal(30,10) Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 6737052..7bdf8c8 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -74,10 +74,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -192,10 +192,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -338,10 +338,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -455,10 +455,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -600,10 +600,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -717,10 +717,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index e3d4f40..3ac4166 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -78,10 +78,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 3:decimal(21,0), 4:decimal(22,1), 5:decimal(23,2), 6:decimal(24,3), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0) Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -246,10 +246,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + valueColumns: 4:decimal(21,0), 5:decimal(22,1), 6:decimal(23,2), 7:decimal(24,3), 8:decimal(25,4), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(22,1), 16:decimal(23,2), 17:decimal(24,3), 18:decimal(25,4), 19:decimal(21,0), 20:decimal(21,0), 21:decimal(21,0), 22:decimal(21,0) Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -441,10 +441,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + valueColumns: 3:decimal(21,0), 4:decimal(21,0), 5:decimal(21,0), 6:decimal(21,0), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(21,0), 16:decimal(21,0), 17:decimal(21,0), 18:decimal(21,0), 19:decimal(22,1), 20:decimal(23,2), 21:decimal(24,3), 22:decimal(25,4), 23:decimal(26,5), 24:decimal(27,6), 25:decimal(28,7), 26:decimal(29,8), 27:decimal(30,9), 28:decimal(31,10), 29:decimal(32,11), 30:decimal(33,12), 31:decimal(34,13), 32:decimal(35,14), 33:decimal(36,15), 34:decimal(37,16) Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap @@ -625,10 +625,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(30,9) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4] + valueColumns: 4:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out index b33f090..112def0 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out @@ -108,10 +108,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:decimal(10,4), 2:decimal(15,8) Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,4)), _col2 (type: decimal(15,8)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 59b3c4a..7215f14 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2316,10 +2316,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:decimal(30,10), 2:bigint, 3:struct Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap @@ -2383,10 +2383,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumns: 5:decimal(38,18), 3:decimal(24,14), 1:decimal(30,10) Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 @@ -3256,10 +3256,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -3391,10 +3391,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -3606,10 +3606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3737,10 +3736,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3868,10 +3866,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -6254,10 +6251,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:decimal(25,3), 2:bigint, 3:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap @@ -6322,10 +6319,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumns: 5:decimal(38,16), 3:decimal(19,7), 1:decimal(25,3) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(19,7)), _col3 (type: decimal(25,3)) Reducer 3 @@ -7200,10 +7197,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -7336,10 +7333,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -7552,10 +7549,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7684,10 +7680,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7816,10 +7811,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 0000000..7054cee --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1578 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:int, col 0:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:int, col 0:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:int, 5:char(2) + smallTableValueMapping: 5:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 1:int, 0:int, 2:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 1:int, 0:int, 2:int, 3:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:int, 5:char(2) + smallTableValueMapping: 5:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 1:int, 0:int, 2:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 1:int, 0:int, 2:int, 3:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + FullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 +0 10 15 BB +0 10 15 FF +1 20 25 NULL +2 NULL 50 NULL +NULL NULL NULL DD +NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 +0 10 15 +0 10 15 +1 20 25 +2 NULL 50 +NULL NULL NULL +NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 0000000..b7e1d50 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,8299 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 2:bigint, 3:date + smallTableValueMapping: 3:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint, 3:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 2:smallint, 3:timestamp + smallTableValueMapping: 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 3:int, 4:decimal(38,18) + smallTableValueMapping: 4:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 3:int, 4:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 3:smallint, 4:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:smallint, 4:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + smallTableValueMapping: 7:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 2:string, 3:date, 4:timestamp + smallTableValueMapping: 3:date, 4:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:date, 4:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterIntersectStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..a21c10d --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,12905 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 2:bigint, 3:date + smallTableValueMapping: 3:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint, 3:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 2:smallint, 3:timestamp + smallTableValueMapping: 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:string, 3:int, 4:decimal(38,18) + smallTableValueMapping: 4:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 3:int, 4:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 3:smallint, 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:smallint, 4:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + smallTableValueMapping: 7:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 2:string, 3:date, 4:timestamp + smallTableValueMapping: 3:date, 4:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:date, 4:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterIntersectStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + className: VectorMapJoinFullOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 0000000..ddf5562 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,12951 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 9a2f5d8..c7085bd 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -84,10 +84,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -243,10 +242,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -402,10 +400,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +552,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -708,10 +704,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -868,10 +863,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index 6005fb2..8f4874c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -87,11 +87,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -147,11 +147,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -289,11 +289,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -349,11 +349,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -501,11 +501,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -558,11 +557,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -620,11 +618,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -663,10 +661,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -808,11 +806,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -865,11 +862,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -927,11 +923,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -970,10 +966,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -1111,11 +1107,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1168,11 +1163,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1180,11 +1174,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1228,10 +1221,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1251,6 +1243,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1293,10 +1288,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1439,11 +1433,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1496,11 +1489,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1508,11 +1500,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1556,10 +1547,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1579,6 +1569,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1621,10 +1614,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1760,10 +1752,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -1926,10 +1918,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2004,10 +1995,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 @@ -2144,10 +2135,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -2155,10 +2145,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2219,10 +2208,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -2242,6 +2230,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2284,10 +2275,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index d8e6b3f..0d5dd91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -101,11 +101,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1L (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 4] + keyColumns: 0:int, 1:int, 4:bigint keyExpressions: ConstantVectorExpression(val 1) -> 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -270,10 +270,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 3586eae..198c826 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -102,10 +102,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -264,10 +264,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -426,10 +426,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -588,10 +588,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -744,10 +744,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -900,10 +899,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1041,10 +1039,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index b072ffc..3b1d8af 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -147,10 +147,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -269,10 +269,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -328,10 +328,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -503,10 +503,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: double) Reducer 3 @@ -673,10 +673,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -732,10 +732,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out index 80ecd59..a0303e0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -366,10 +366,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index dabc987..aa19a38 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -95,10 +95,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap @@ -233,10 +233,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap @@ -397,10 +397,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:struct, 3:bigint Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: bigint) Execution mode: vectorized, llap @@ -456,10 +456,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index b896193..47348f1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -108,10 +108,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -183,10 +183,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -207,6 +207,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -259,10 +262,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -340,10 +343,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -352,10 +355,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -427,10 +430,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -451,6 +454,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -503,10 +509,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -615,10 +621,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -674,10 +680,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -686,10 +692,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -744,10 +750,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 4 @@ -768,6 +774,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -820,10 +829,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 8da5735..7eca699 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,10 +157,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -280,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -350,10 +348,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -499,10 +497,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -569,10 +566,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 @@ -611,10 +608,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 6c4ae65..7323752 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -231,10 +230,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 7f7624a..c10b455 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -249,10 +248,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -417,10 +415,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -586,10 +583,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -657,10 +653,10 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -792,10 +788,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -953,10 +948,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1128,10 +1122,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1290,10 +1283,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1354,11 +1346,11 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int keyExpressions: IfExprColumnNull(col 3:boolean, col 0:int, null)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 3:boolean, col 0:int) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -1490,10 +1482,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1656,10 +1647,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1822,10 +1812,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1983,10 +1972,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e67bca7..afa13ab 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -156,10 +156,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -289,10 +289,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -357,10 +357,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -490,10 +490,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -558,10 +558,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -689,10 +689,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -754,10 +753,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 4968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -884,10 +882,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -940,10 +937,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -1068,10 +1064,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -1127,10 +1123,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index dc3363d..8c5c1da 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -86,10 +86,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:int, 3:int Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap @@ -153,11 +153,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:int, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:int Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 98e6e54..21cc5dc 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -168,6 +168,9 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index e839214..3f8bc33 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -368,11 +368,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -428,11 +428,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -743,11 +743,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Select Operator @@ -778,11 +778,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -838,11 +838,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -931,11 +931,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 5 diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index 4a3bc02..294faf7 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -269,6 +269,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..59ed1ce 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -84,12 +84,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 2 @@ -158,10 +160,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -250,13 +251,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -329,10 +332,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -457,13 +459,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -532,10 +536,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -624,10 +628,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -677,14 +681,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -786,14 +792,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -863,10 +871,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -956,14 +964,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1032,10 +1042,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1125,14 +1135,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1201,10 +1213,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1293,10 +1305,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1346,14 +1358,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -1462,10 +1476,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1515,14 +1529,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index cfe3d5f..d48d810 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -229,6 +229,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..57c045c 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -50,6 +50,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -68,9 +69,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -85,6 +88,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -92,6 +101,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -110,13 +120,24 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -135,10 +156,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -152,14 +175,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -190,26 +226,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -217,7 +234,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -249,6 +266,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -260,13 +278,25 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [string], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 1 Map 3 @@ -285,10 +315,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -302,6 +334,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint] Map 3 Map Operator Tree: TableScan @@ -309,6 +347,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -320,11 +359,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -338,14 +381,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -376,26 +432,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -403,7 +440,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -435,6 +472,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -446,9 +484,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -463,6 +503,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -470,6 +516,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -481,13 +528,24 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -506,10 +564,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -523,14 +583,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -561,26 +634,204 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96342 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -591,7 +842,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -626,6 +877,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -644,9 +896,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -661,6 +915,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -668,6 +928,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -687,6 +948,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Inner Join 0 to 2 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false], 2:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -717,10 +984,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -734,6 +1003,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -741,6 +1016,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -759,9 +1035,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -776,14 +1054,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -814,32 +1105,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -850,7 +1116,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -885,6 +1151,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -896,9 +1163,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -913,6 +1182,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -920,6 +1195,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -931,11 +1207,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -949,6 +1229,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -956,6 +1242,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -967,9 +1254,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -984,6 +1273,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -991,6 +1286,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1004,16 +1303,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1044,32 +1354,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1080,7 +1365,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1115,6 +1400,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1126,9 +1412,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1143,6 +1431,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1150,6 +1444,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1161,11 +1456,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1179,6 +1478,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1186,6 +1491,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1197,9 +1503,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1214,6 +1522,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1221,6 +1535,10 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1234,16 +1552,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1274,32 +1603,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1310,7 +1614,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1345,6 +1649,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1356,9 +1661,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1373,6 +1680,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1380,6 +1693,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1391,11 +1705,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1409,6 +1727,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1416,6 +1740,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1427,9 +1752,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1444,6 +1771,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1451,6 +1784,10 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1464,16 +1801,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1504,32 +1852,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1540,7 +1863,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1575,6 +1898,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1586,9 +1910,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1603,6 +1929,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1610,6 +1942,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1621,11 +1954,15 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1639,6 +1976,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1646,6 +1989,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1657,9 +2001,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1674,6 +2020,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1681,6 +2033,10 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1694,16 +2050,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1734,28 +2101,1497 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -348019368476 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 4e5205f..5c6b004 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -47,15 +47,173 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4937935 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -128,42 +286,677 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4937935 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 056360f..f23fef7 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -13630578 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -128,42 +280,643 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4542003 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3079923 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -4509891 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3113558 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 -#### A masked pattern was here #### -3079923 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 821ea3a..1775c1d 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -167,15 +170,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -261,15 +266,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -293,6 +300,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -350,6 +358,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -357,6 +371,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -371,8 +386,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -386,6 +403,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -403,15 +426,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -435,6 +460,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -492,6 +518,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -499,6 +531,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -513,8 +546,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -528,6 +563,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -545,15 +586,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -577,6 +620,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -595,9 +639,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -631,6 +682,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -638,6 +695,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -652,8 +710,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -667,6 +727,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -684,15 +750,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -716,6 +784,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -734,9 +803,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -770,6 +846,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -777,6 +859,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -791,8 +874,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -806,6 +891,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -823,6 +914,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..983dc9e 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -128,18 +128,98 @@ POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary - +PREHOOK: query: explain vectorization expression select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary - +POSTHOOK: query: explain vectorization expression select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -156,16 +236,98 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -184,16 +346,98 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -204,24 +448,110 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 #### A masked pattern was here #### val_0 @@ -235,16 +565,98 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value < 'val_10') and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -258,96 +670,960 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -val_10 -val_8 -val_9 -PREHOOK: query: explain vectorization only summary -select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -PREHOOK: query: explain vectorization only summary -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 -PREHOOK: query: explain vectorization only summary -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### -0 -0 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t3 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +val_10 +val_8 +val_9 +PREHOOK: query: explain vectorization expression +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization expression +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization expression +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization expression +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 (2 * _col0) (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + Map-reduce partition columns: (2 * _col0) (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization expression +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization expression +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization expression +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 0 0 0 @@ -356,126 +1632,488 @@ POSTHOOK: Input: default@t3 10 10 10 -2 4 4 -5 -5 -5 8 8 -9 -PREHOOK: query: explain vectorization only summary -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: explain vectorization expression +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization expression +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -8 val_8 -PREHOOK: query: explain vectorization only summary -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain vectorization only summary -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization only summary -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -487,6 +2125,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -495,23 +2145,122 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization only summary -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Left Semi Join 1 to 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -543,23 +2292,130 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization only summary -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization expression +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -587,30 +2443,141 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -PREHOOK: query: explain vectorization only summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization expression +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization expression +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -638,76 +2605,171 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 -4 -4 -8 -8 -PREHOOK: query: explain vectorization only summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 10 10 10 10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -762,16 +2824,120 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -819,16 +2985,90 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization only summary +PREHOOK: query: explain vectorization expression select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary +POSTHOOK: query: explain vectorization expression select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY PREHOOK: Input: default@t2 @@ -839,10 +3079,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -880,6 +3120,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -947,10 +3188,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -988,6 +3229,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1057,10 +3299,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1098,6 +3340,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1159,10 +3402,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1200,6 +3443,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1276,10 +3520,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1317,6 +3561,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1381,10 +3626,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1446,6 +3691,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1490,10 +3736,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1555,6 +3801,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 @@ -1596,10 +3843,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1661,6 +3908,7 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1703,10 +3951,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1744,6 +3992,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1824,10 +4073,10 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1865,6 +4114,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -1930,10 +4180,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -1964,25 +4214,34 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: llap LLAP IO: all inputs Map 3 @@ -2031,10 +4290,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2072,10 +4331,10 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2113,6 +4372,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -2188,10 +4448,10 @@ POSTHOOK: Input: default@t3 5 val_5 8 val_8 9 val_9 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2222,20 +4482,29 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2292,10 +4561,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2333,10 +4602,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2364,20 +4633,32 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col5 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2397,20 +4678,23 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2419,10 +4703,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2472,10 +4756,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2491,7 +4775,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2524,20 +4808,23 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2545,28 +4832,40 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2619,11 +4918,11 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2638,86 +4937,140 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: b Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 7 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 keys: - 0 key (type: int) + 0 _col5 (type: int) 1 _col0 (type: int) - 2 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -2725,13 +5078,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2759,18 +5112,18 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2785,8 +5138,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2794,35 +5147,62 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -2837,30 +5217,13 @@ STAGE PLANS: Reducer 2 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2872,13 +5235,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2906,19 +5269,17 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -NULL -NULL -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -2934,7 +5295,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2943,11 +5305,25 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 4 @@ -2955,20 +5331,23 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 5 @@ -2988,28 +5367,26 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3075,10 +5452,158 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -3116,6 +5641,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -3126,6 +5652,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3236,10 +5763,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization summary +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization summary +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -3280,6 +5807,7 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE @@ -3359,6 +5887,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3472,6 +6001,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3587,6 +6117,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3694,6 +6225,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3815,6 +6347,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3955,6 +6488,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4068,6 +6602,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4178,6 +6713,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4260,6 +6796,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4386,6 +6923,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4497,17 +7035,24 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Select Vectorization: - className: VectorSelectOperator - native: true - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4645,6 +7190,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4766,14 +7312,21 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4916,14 +7469,24 @@ STAGE PLANS: TableScan Vectorization: native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4958,18 +7521,205 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4982,6 +7732,37 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4990,23 +7771,37 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator + limit: -1 + Processor Tree: + ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -5038,10 +7833,13 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization only operator +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only operator +POSTHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -5055,18 +7853,60 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5075,17 +7915,33 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 6 Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5097,22 +7953,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 7 Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5124,8 +8003,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5134,57 +8030,80 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - - Stage: Stage-0 - Fetch Operator - -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 -NULL -NULL -NULL + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY @@ -5203,17 +8122,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5222,25 +8156,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5252,7 +8189,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan Vectorization: native: true @@ -5272,7 +8209,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5333,10 +8269,10 @@ POSTHOOK: Input: default@t3 8 8 PREHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5350,17 +8286,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5369,25 +8315,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Map 4 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5419,6 +8368,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5438,13 +8390,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -5472,12 +8424,23 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 -NULL -NULL +9 NULL NULL NULL @@ -5499,17 +8462,37 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5518,25 +8501,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 5 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5548,7 +8534,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan Vectorization: native: true @@ -5556,6 +8542,10 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5567,7 +8557,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Reducer 2 Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5583,64 +8572,33 @@ STAGE PLANS: File Sink Vectorization: className: VectorFileSinkOperator native: false + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Union 2 Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -NULL -NULL -NULL PREHOOK: query: explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -5670,11 +8628,13 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true @@ -5840,6 +8800,7 @@ STAGE PLANS: className: VectorSelectOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -5902,10 +8863,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -5932,12 +8893,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -5948,7 +8907,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -5957,15 +8915,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -5978,12 +8935,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -5991,12 +8942,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6005,16 +8954,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6025,10 +8971,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6041,27 +8985,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6069,7 +9000,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6104,10 +9034,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6134,12 +9064,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6150,7 +9078,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6159,15 +9086,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6180,12 +9106,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6193,12 +9113,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6207,16 +9125,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6227,10 +9142,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6243,27 +9156,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6271,7 +9171,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6308,10 +9207,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6338,12 +9237,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6354,7 +9251,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6363,15 +9259,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6384,12 +9279,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6397,12 +9286,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6411,16 +9298,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6431,10 +9315,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6447,27 +9329,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6475,7 +9344,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6504,10 +9372,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6534,12 +9402,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6550,7 +9416,6 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6559,23 +9424,21 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6588,12 +9451,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6601,12 +9458,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6615,16 +9470,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -6635,10 +9487,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6651,27 +9501,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -6679,7 +9516,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6719,10 +9555,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6749,12 +9585,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -6765,7 +9599,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -6774,15 +9607,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6795,12 +9627,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -6808,12 +9634,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6822,16 +9646,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -6842,10 +9663,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6858,27 +9677,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -6886,7 +9692,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -6918,10 +9723,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6948,12 +9753,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -6962,16 +9765,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -6982,10 +9782,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6998,12 +9796,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7011,12 +9803,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7027,7 +9817,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7036,23 +9825,21 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7065,27 +9852,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7093,7 +9867,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7125,10 +9898,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7155,12 +9928,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7169,16 +9940,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -7189,10 +9957,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7205,12 +9971,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7218,12 +9978,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7234,7 +9992,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7243,23 +10000,21 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7272,27 +10027,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -7300,7 +10042,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7329,10 +10070,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7359,12 +10100,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7373,16 +10112,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7393,10 +10129,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7409,12 +10143,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -7422,12 +10150,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7438,7 +10164,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7447,15 +10172,14 @@ STAGE PLANS: input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7468,27 +10192,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7496,7 +10207,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7530,10 +10240,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7560,12 +10270,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7576,7 +10284,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7585,15 +10292,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7606,12 +10312,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -7619,12 +10319,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7633,16 +10331,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7653,10 +10348,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7669,27 +10362,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -7697,7 +10377,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7745,10 +10424,10 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7775,12 +10454,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -7791,7 +10468,6 @@ STAGE PLANS: 1 (2 * _col0) (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -7800,15 +10476,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7821,12 +10496,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -7834,12 +10503,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7848,16 +10515,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -7868,11 +10532,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7885,27 +10546,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -7913,7 +10561,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7946,10 +10593,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7976,53 +10623,62 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8034,12 +10690,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -8047,12 +10697,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -8061,10 +10709,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -8078,12 +10724,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -8091,12 +10731,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8105,16 +10743,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8125,10 +10760,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8141,27 +10774,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -8169,14 +10789,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8214,10 +10833,10 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -8244,12 +10863,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -8260,7 +10877,6 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: bigTableKeyExpressions: col 0:int, col 1:string - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -8269,15 +10885,14 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8290,12 +10905,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -8303,12 +10912,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8317,16 +10924,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -8337,10 +10941,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8353,27 +10955,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -8381,7 +10970,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -8410,25 +10998,273 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8454,44 +11290,54 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8503,12 +11349,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -8516,45 +11356,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8563,15 +11373,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -8579,12 +11383,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -8593,16 +11395,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -8613,10 +11412,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8629,27 +11426,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8657,14 +11441,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8676,13 +11459,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -8694,6 +11477,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -8702,11 +11497,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8721,48 +11516,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8771,34 +11544,25 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8810,51 +11574,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8866,27 +11624,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -8894,14 +11671,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8913,13 +11689,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -8951,10 +11727,13 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -8970,8 +11749,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -8981,18 +11762,46 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9001,33 +11810,32 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9040,51 +11848,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 7 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9096,44 +11898,32 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9141,18 +11931,68 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -9201,10 +12041,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9220,8 +12060,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -9231,18 +12071,54 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9251,54 +12127,48 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9310,30 +12180,21 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9346,44 +12207,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9391,14 +12222,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9451,11 +12281,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -9470,7 +12300,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9481,18 +12312,38 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9501,15 +12352,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -9517,38 +12362,38 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9560,12 +12405,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -9573,17 +12412,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9596,44 +12432,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9641,14 +12465,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9660,13 +12483,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -9694,19 +12517,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9722,8 +12556,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -9733,18 +12568,61 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9753,54 +12631,48 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9812,30 +12684,29 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9848,44 +12719,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -9893,18 +12734,60 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -9966,10 +12849,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9996,12 +12879,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10012,7 +12893,6 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -10021,6 +12901,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -10029,7 +12910,6 @@ STAGE PLANS: 1 value (type: string) Map Join Vectorization: bigTableKeyExpressions: col 1:string - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true @@ -10038,15 +12918,14 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10059,12 +12938,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10072,12 +12945,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10086,16 +12957,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10106,10 +12974,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10122,12 +12988,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -10135,17 +12995,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10158,27 +13015,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -10186,7 +13030,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10252,10 +13095,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10281,12 +13124,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10295,7 +13136,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -10305,7 +13145,6 @@ STAGE PLANS: 1 _col0 (type: string) Map Join Vectorization: bigTableKeyExpressions: col 1:string - bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -10314,6 +13153,7 @@ STAGE PLANS: input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false File Sink Vectorization: @@ -10335,12 +13175,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -10348,12 +13182,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10362,16 +13194,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -10382,10 +13211,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10398,12 +13225,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -10421,10 +13242,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10451,12 +13272,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10466,13 +13285,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10482,10 +13297,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10498,12 +13311,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10511,12 +13318,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10525,16 +13330,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10545,10 +13347,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10561,27 +13361,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10589,7 +13376,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10624,10 +13410,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10654,12 +13440,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10669,13 +13453,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10685,10 +13465,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10701,12 +13479,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10714,12 +13486,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10728,16 +13498,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10748,10 +13515,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10764,27 +13529,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10792,7 +13544,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -10829,10 +13580,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10859,12 +13610,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -10874,13 +13623,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10890,10 +13635,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10906,12 +13649,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -10919,12 +13656,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -10933,16 +13668,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -10953,10 +13685,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10969,27 +13699,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -10997,7 +13714,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11026,10 +13742,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11056,12 +13772,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11071,13 +13785,9 @@ STAGE PLANS: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11088,17 +13798,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11111,12 +13818,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -11124,12 +13825,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11138,16 +13837,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -11158,10 +13854,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11174,27 +13868,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11202,7 +13883,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11242,10 +13922,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11272,12 +13952,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11287,13 +13965,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -11303,10 +13977,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11319,12 +13991,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -11332,12 +13998,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11346,16 +14010,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -11366,10 +14027,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11382,27 +14041,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -11410,7 +14056,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11442,10 +14087,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11472,12 +14117,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11486,16 +14129,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11506,10 +14146,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11522,12 +14160,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -11535,12 +14167,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11550,13 +14180,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -11567,17 +14193,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11590,27 +14213,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11618,7 +14228,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11650,10 +14259,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11680,12 +14289,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11694,16 +14301,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -11714,10 +14318,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11730,12 +14332,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -11743,12 +14339,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11758,13 +14352,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 @@ -11775,17 +14365,14 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11798,27 +14385,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -11826,7 +14400,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11855,10 +14428,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -11885,12 +14458,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -11899,16 +14470,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -11919,10 +14487,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11935,12 +14501,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -11948,12 +14508,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -11963,13 +14521,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 @@ -11979,10 +14533,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11995,27 +14547,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12023,7 +14562,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12057,10 +14595,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12087,12 +14625,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12102,13 +14638,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 3 @@ -12118,10 +14650,8 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12134,12 +14664,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12147,12 +14671,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12161,16 +14683,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12181,10 +14700,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12197,27 +14714,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -12225,7 +14729,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12273,10 +14776,10 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12303,12 +14806,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12318,13 +14819,9 @@ STAGE PLANS: 0 key (type: int) 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12334,10 +14831,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12350,12 +14845,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12363,12 +14852,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12377,16 +14864,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12397,11 +14881,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12414,27 +14895,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -12442,7 +14910,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12475,10 +14942,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12505,53 +14972,56 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12560,15 +15030,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -12576,12 +15040,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -12590,10 +15052,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -12607,12 +15067,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -12620,12 +15074,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12634,16 +15086,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -12654,10 +15103,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12670,27 +15117,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -12698,14 +15132,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12743,10 +15176,10 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -12773,12 +15206,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -12788,13 +15219,9 @@ STAGE PLANS: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -12804,10 +15231,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12820,12 +15245,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -12833,12 +15252,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -12847,16 +15264,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -12866,12 +15280,250 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12883,42 +15535,28 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12930,35 +15568,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12984,44 +15624,48 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13030,15 +15674,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -13046,45 +15684,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13093,15 +15701,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -13109,12 +15711,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -13123,16 +15723,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -13143,10 +15740,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13159,27 +15754,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13187,14 +15769,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13206,13 +15787,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13224,6 +15805,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -13232,11 +15825,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -13251,48 +15844,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13301,34 +15872,25 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13340,51 +15902,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13396,27 +15952,45 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13424,14 +15998,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13443,13 +16016,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13481,10 +16054,13 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -13500,8 +16076,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -13511,18 +16089,43 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13534,30 +16137,29 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13570,51 +16172,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 7 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13626,44 +16222,32 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13671,18 +16255,65 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -13731,10 +16362,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -13750,8 +16381,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -13761,18 +16392,48 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13784,51 +16445,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13840,30 +16495,21 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13871,49 +16517,19 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -13921,14 +16537,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13981,11 +16596,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -14000,7 +16615,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -14011,18 +16627,35 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14034,12 +16667,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -14047,38 +16674,38 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14090,12 +16717,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -14103,17 +16724,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14126,44 +16744,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14171,14 +16777,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14190,13 +16795,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -14224,19 +16829,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14252,8 +16868,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -14263,18 +16880,55 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14286,51 +16940,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14342,30 +16990,29 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14378,44 +17025,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14423,18 +17040,57 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -14496,10 +17152,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14526,12 +17182,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -14541,13 +17195,9 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -14559,13 +17209,9 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 4 @@ -14575,10 +17221,8 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14591,12 +17235,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -14604,12 +17242,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14618,16 +17254,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -14638,10 +17271,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14654,12 +17285,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -14667,17 +17292,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14690,27 +17312,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -14718,7 +17327,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -14784,10 +17392,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14813,12 +17421,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14827,7 +17433,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -14836,13 +17441,9 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 2 @@ -14868,12 +17469,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -14881,12 +17476,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -14895,16 +17488,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -14915,10 +17505,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14931,12 +17519,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -14954,10 +17536,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14984,12 +17566,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -14999,26 +17579,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15031,12 +17606,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15044,12 +17613,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15058,16 +17625,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15078,10 +17642,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15094,27 +17656,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15122,7 +17671,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15157,10 +17705,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15187,12 +17735,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15202,26 +17748,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15234,12 +17775,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15247,12 +17782,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15261,16 +17794,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15281,10 +17811,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15297,27 +17825,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15325,7 +17840,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15362,10 +17876,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15392,12 +17906,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15407,26 +17919,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15439,12 +17946,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15452,12 +17953,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15466,16 +17965,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -15486,10 +17982,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15502,27 +17996,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15530,7 +18011,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15559,10 +18039,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15589,12 +18069,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15604,34 +18082,28 @@ STAGE PLANS: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 3 Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15644,12 +18116,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15657,12 +18123,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) predicate: (key < 15) (type: boolean) Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15671,16 +18135,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -15691,10 +18152,8 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15707,27 +18166,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -15735,7 +18181,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15775,10 +18220,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15805,12 +18250,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -15820,26 +18263,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15852,12 +18290,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -15865,12 +18297,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) predicate: ((value < 'val_10') and key is not null) (type: boolean) Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -15879,16 +18309,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -15899,10 +18326,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15915,27 +18340,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -15943,7 +18355,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -15975,10 +18386,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16005,12 +18416,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) predicate: (key > 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16019,16 +18428,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16039,10 +18445,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16055,12 +18459,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16068,12 +18466,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16083,34 +18479,28 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16123,27 +18513,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -16151,7 +18528,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16183,10 +18559,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16213,12 +18589,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16227,16 +18601,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -16247,10 +18618,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16263,12 +18632,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16276,12 +18639,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16291,34 +18652,28 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] outputColumnNames: _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16331,27 +18686,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) @@ -16359,7 +18701,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16388,10 +18729,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16418,12 +18759,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16432,16 +18771,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16452,10 +18788,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16468,12 +18802,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -16481,12 +18809,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16496,26 +18822,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16528,27 +18849,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -16556,7 +18864,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16590,10 +18897,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16620,12 +18927,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16635,26 +18940,21 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16667,12 +18967,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -16680,12 +18974,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16694,16 +18986,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16714,10 +19003,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16730,27 +19017,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -16758,7 +19032,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16806,10 +19079,10 @@ POSTHOOK: Input: default@t3 8 8 9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16836,12 +19109,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -16851,26 +19122,21 @@ STAGE PLANS: 0 key (type: int) 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16883,12 +19149,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -16896,12 +19156,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) predicate: (2 * key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -16910,16 +19168,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -16930,11 +19185,8 @@ STAGE PLANS: Map-reduce partition columns: (2 * _col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16947,27 +19199,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) @@ -16975,7 +19214,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -17008,10 +19246,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -17038,53 +19276,58 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17093,15 +19336,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan @@ -17109,12 +19346,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -17123,10 +19358,8 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap @@ -17140,12 +19373,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -17153,12 +19380,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17167,16 +19392,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17187,10 +19409,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17203,27 +19423,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) @@ -17231,14 +19438,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17276,10 +19482,10 @@ POSTHOOK: Input: default@t3 10 val_10 10 val_5 4 val_4 4 val_2 8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -17306,12 +19512,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -17321,26 +19525,21 @@ STAGE PLANS: 0 key (type: int), value (type: string) 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17353,12 +19552,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -17366,12 +19559,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17380,16 +19571,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -17400,10 +19588,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17416,42 +19602,270 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17463,35 +19877,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17517,44 +19933,50 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17563,15 +19985,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -17579,45 +19995,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17626,15 +20012,9 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -17642,12 +20022,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -17656,16 +20034,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -17676,10 +20051,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17692,27 +20065,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -17720,14 +20080,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17739,13 +20098,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -17757,6 +20116,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -17765,11 +20136,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17784,48 +20155,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17834,34 +20183,25 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17873,51 +20213,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17929,27 +20263,46 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -17957,14 +20310,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17976,13 +20328,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18014,10 +20366,13 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +NULL +NULL +NULL +PREHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18033,8 +20388,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -18044,18 +20401,44 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18067,30 +20450,29 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 6 Map Operator Tree: TableScan alias: b Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18103,51 +20485,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 7 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18159,44 +20535,32 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 0 _col5 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18204,18 +20568,66 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -18264,10 +20676,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18283,8 +20695,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -18294,18 +20706,50 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18317,51 +20761,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18373,80 +20811,41 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18454,14 +20853,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18514,11 +20912,11 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -18533,7 +20931,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18544,18 +20943,36 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18567,12 +20984,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -18580,38 +20991,38 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18623,12 +21034,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -18636,17 +21041,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18659,44 +21061,32 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18704,14 +21094,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18723,13 +21112,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18757,19 +21146,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18785,8 +21185,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -18796,18 +21197,57 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18819,51 +21259,45 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18875,30 +21309,29 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18911,44 +21344,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -18956,18 +21359,58 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + FullOuterIntersect: true + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -19029,10 +21472,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19059,12 +21502,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -19074,17 +21515,14 @@ STAGE PLANS: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] outputColumnNames: _col0, _col1 input vertices: 1 Map 3 Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Map Join Operator condition map: Left Outer Join 0 to 1 @@ -19092,26 +21530,21 @@ STAGE PLANS: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true Reduce Output Operator key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19124,12 +21557,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan @@ -19137,12 +21564,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19151,16 +21576,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -19171,10 +21593,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19187,12 +21607,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -19200,17 +21614,14 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19223,27 +21634,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -19251,7 +21649,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -19317,10 +21714,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization operator select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19346,12 +21743,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 100), SelectColumnIsNotNull(col 1:string)) predicate: ((key > 100) and value is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19360,7 +21755,6 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -19369,17 +21763,14 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 12 Data size: 2226 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true File Output Operator compressed: false File Sink Vectorization: @@ -19401,12 +21792,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -19414,12 +21799,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 1:string) predicate: value is not null (type: boolean) Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -19428,16 +21811,13 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [] keys: _col0 (type: string) mode: hash outputColumnNames: _col0 @@ -19448,10 +21828,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19464,12 +21842,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [1] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_like_2.q.out ql/src/test/results/clientpositive/llap/vector_like_2.q.out index 8e132a7..1db8164 100644 --- ql/src/test/results/clientpositive/llap/vector_like_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -63,10 +63,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:boolean Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out index d5d8e53..ec65f98 100644 --- ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out @@ -166,11 +166,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:string Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -222,14 +222,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 4, 1] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int, 4:string, 1:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 @@ -247,10 +249,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 4, 1] + keyColumns: 0:int, 4:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 37821fb..8e153a4 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -203,6 +203,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -217,6 +218,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -477,6 +479,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -491,6 +494,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index 7c1cbb6..4e8a174 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -351,6 +351,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -375,6 +378,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 9801470..c93555f 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -840,26 +840,141 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -1733,26 +1848,141 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out index 58566df..fe2eafe 100644 --- ql/src/test/results/clientpositive/llap/vector_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -116,10 +116,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -239,10 +238,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -362,10 +360,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -485,10 +482,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -608,10 +604,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -731,10 +726,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -854,10 +848,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -977,10 +970,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1100,10 +1092,9 @@ STAGE PLANS: sort order: +- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1223,10 +1214,9 @@ STAGE PLANS: sort order: -- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1346,10 +1336,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 50e6a85..71a97c3 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -107,15 +107,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -259,10 +260,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -305,15 +306,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..5c91324 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -267,15 +267,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -329,10 +330,10 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11] + valueColumns: 0:tinyint, 1:smallint, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap @@ -443,13 +444,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -503,10 +505,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -709,13 +710,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -727,13 +729,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -754,10 +757,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -799,10 +801,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -843,10 +844,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..8bf4885 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -284,13 +284,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -302,13 +303,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -329,10 +331,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -374,10 +375,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -418,10 +418,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 839952f..ba37846 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -296,10 +296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -354,11 +353,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Reducer 3 @@ -518,10 +517,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(15,2), 1:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -578,11 +577,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 3 @@ -748,10 +747,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -801,10 +800,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -847,6 +845,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -883,11 +884,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1057,10 +1058,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1110,10 +1111,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1156,6 +1156,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1192,11 +1195,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1365,10 +1368,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1418,10 +1421,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1465,6 +1468,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1600,10 +1606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1658,11 +1663,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Reducer 3 @@ -1822,10 +1827,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(7,2), 1:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1882,11 +1887,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 3 @@ -2052,10 +2057,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2105,10 +2110,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2151,6 +2155,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2187,11 +2194,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2361,10 +2368,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2414,10 +2421,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2460,6 +2466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2496,11 +2505,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2669,10 +2678,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2722,10 +2731,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2769,6 +2778,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 8d9ffb8..3654116 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:struct Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 9aeb650..23f7eca 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -142,10 +142,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -402,10 +402,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -631,10 +631,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -860,11 +860,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1122,11 +1122,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1352,11 +1352,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1582,12 +1582,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1845,12 +1845,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2076,12 +2076,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2301,10 +2301,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2529,10 +2529,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2757,10 +2757,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2954,11 +2954,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3184,11 +3184,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3414,11 +3414,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3612,12 +3612,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3843,12 +3843,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4074,12 +4074,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4315,10 +4315,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4543,11 +4543,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4793,10 +4793,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5021,11 +5021,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5245,10 +5245,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5444,11 +5444,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5645,11 +5645,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5814,12 +5814,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6146,10 +6146,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6345,11 +6345,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6546,12 +6546,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6748,11 +6748,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index bd42ed2..5894fa0 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -87,13 +87,15 @@ STAGE PLANS: 0 one (type: int), two (type: int) 1 1 (type: int), 2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 9eaf293..9e1493c 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -127,10 +127,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + valueColumns: 0:struct, 1:double, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct, 7:tinyint, 8:bigint Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap @@ -310,10 +309,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + valueColumns: 0:struct, 1:double, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct, 7:tinyint, 8:bigint Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9859824..8a388b8 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2800,10 +2800,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -2943,10 +2942,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index a821265..b7abf78 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -43,11 +43,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -246,11 +246,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -454,11 +454,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -633,11 +633,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -828,11 +828,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1030,11 +1030,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1238,11 +1238,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1284,11 +1284,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1360,6 +1359,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -1525,11 +1527,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1, 5] + keyColumns: 2:string, 1:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1684,11 +1685,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1861,11 +1862,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2040,11 +2041,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2230,12 +2231,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#3' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#3) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2398,11 +2399,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2565,11 +2566,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2785,11 +2786,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3077,11 +3078,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3356,11 +3357,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3458,11 +3459,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 3 @@ -3609,11 +3610,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3823,11 +3824,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:int, 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [4, 5] + partitionColumns: 0:string, 1:string, 2:int, 3:double + valueColumns: 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: double), _col5 (type: double) Execution mode: vectorized, llap @@ -3892,11 +3893,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3, 4, 5] + partitionColumns: 1:string + valueColumns: 2:int, 3:double, 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) Reducer 3 @@ -4056,11 +4057,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4259,11 +4260,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4527,11 +4528,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -4809,11 +4810,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [3, 7] + partitionColumns: 2:string + valueColumns: 3:string, 7:double Statistics: Num rows: 26 Data size: 8294 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_brand (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5298,11 +5299,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Reduce Output Operator @@ -5311,11 +5312,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Reduce Output Operator @@ -5324,11 +5325,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5713,11 +5714,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 9 @@ -6081,11 +6082,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -6264,11 +6265,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:string + valueColumns: 1:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string) Execution mode: vectorized, llap @@ -6429,11 +6430,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6586,11 +6587,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6749,11 +6750,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6922,11 +6923,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7089,11 +7090,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7266,11 +7267,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7447,11 +7448,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7627,11 +7628,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7825,11 +7826,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -8021,11 +8022,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -8245,11 +8246,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 1] - valueColumnNums: [5, 7] + partitionColumns: 2:string, 1:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -8485,12 +8486,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 10] + keyColumns: 2:string, 10:string keyExpressions: StringSubstrColStart(col 4:string, start 1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4] + partitionColumns: 2:string + valueColumns: 4:string Statistics: Num rows: 26 Data size: 5252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_type (type: string) Execution mode: vectorized, llap @@ -8675,11 +8676,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -8830,11 +8831,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9018,11 +9019,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9173,11 +9174,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9331,12 +9332,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:int keyExpressions: ConstantVectorExpression(val 0) -> 10:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [1, 7] + partitionColumns: 11:int + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9427,10 +9428,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 5] + valueColumns: 2:double, 5:double Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 @@ -9565,12 +9566,11 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#6' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5] + keyColumns: 10:string, 5:int keyExpressions: ConstantVectorExpression(val Manufacturer#6) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [] + partitionColumns: 11:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -9705,12 +9705,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [7] + partitionColumns: 11:string + valueColumns: 7:double Statistics: Num rows: 5 Data size: 1135 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -9854,12 +9854,12 @@ STAGE PLANS: Map-reduce partition columns: 'm1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:string keyExpressions: ConstantVectorExpression(val m1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 401a73e..663baa8 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -89,11 +89,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -283,11 +283,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -445,11 +445,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 7, 1, 4] + keyColumns: 0:tinyint, 6:boolean, 7:string, 1:smallint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -669,11 +668,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 7] + keyColumns: 1:smallint, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:smallint Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -893,11 +891,10 @@ STAGE PLANS: Map-reduce partition columns: b (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 1, 7, 5] + keyColumns: 3:bigint, 1:smallint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:bigint Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1117,11 +1114,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 3] + keyColumns: 4:float, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1342,10 +1339,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7] + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1520,11 +1517,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1687,11 +1684,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2] + keyColumns: 8:timestamp, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1931,11 +1928,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out index d87e96f..61b09e7 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out @@ -58,10 +58,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -111,10 +111,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -157,6 +157,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -201,12 +204,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 8dcb900..106322b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -62,10 +62,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -130,12 +130,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:int, 1:bigint keyExpressions: ConstantVectorExpression(val 0) -> 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:int Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -305,10 +304,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap @@ -365,11 +364,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2] + keyColumns: 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:string + valueColumns: 0:int Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -542,10 +541,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3, 4, 5] + valueColumns: 2:double, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) Execution mode: vectorized, llap @@ -602,11 +601,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 3, 4, 5] + partitionColumns: 0:string + valueColumns: 1:string, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) Reducer 3 @@ -681,12 +680,12 @@ STAGE PLANS: Map-reduce partition columns: lower(_col1) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 3] + keyColumns: 7:string, 3:double keyExpressions: StringLower(col 2:string) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [6, 2, 4, 5] + partitionColumns: 8:string + valueColumns: 6:int, 2:string, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) Reducer 4 @@ -761,11 +760,11 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 5] + keyColumns: 4:int, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [6, 2] + partitionColumns: 4:int + valueColumns: 6:int, 2:int Statistics: Num rows: 10 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE value expressions: dense_rank_window_1 (type: int), _col0 (type: int) Reducer 5 @@ -901,10 +900,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -954,10 +953,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -1000,6 +999,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1044,12 +1046,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out index 595b0a8..e52a76b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1] + keyColumns: 7:string, 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3] + partitionColumns: 7:string + valueColumns: 3:bigint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint) Execution mode: vectorized, llap @@ -10256,11 +10256,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3, 8] + partitionColumns: 7:string + valueColumns: 3:bigint, 8:timestamp Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint), ts (type: timestamp) Execution mode: vectorized, llap @@ -10353,11 +10353,11 @@ STAGE PLANS: Map-reduce partition columns: _col7 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 3] + keyColumns: 0:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [4, 2] + partitionColumns: 0:string + valueColumns: 4:int, 2:bigint Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col3 (type: bigint) Reducer 3 @@ -10534,10 +10534,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 4] + valueColumns: 1:smallint, 2:int, 4:float Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int), f (type: float) Execution mode: vectorized, llap @@ -10628,10 +10628,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 3, 0] + valueColumns: 4:bigint, 3:float, 0:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col4 (type: float), _col7 (type: string) Reducer 3 @@ -10801,11 +10801,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 6] + keyColumns: 7:string, 6:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 10] + partitionColumns: 7:string + valueColumns: 1:smallint, 10:binary Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), bin (type: binary) Execution mode: vectorized, llap @@ -10898,11 +10898,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:binary native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 @@ -11074,10 +11074,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 7] + valueColumns: 4:float, 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: f (type: float), s (type: string) Execution mode: vectorized, llap @@ -11168,12 +11168,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:float keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [3, 2] + partitionColumns: 5:int + valueColumns: 3:double, 2:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: double), _col7 (type: string) Reducer 3 @@ -11354,11 +11354,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 4] + partitionColumns: 7:string + valueColumns: 1:smallint, 4:float Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), f (type: float) Execution mode: vectorized, llap @@ -11451,11 +11451,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out index 9358281..ba58992 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out @@ -88,11 +88,10 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: ConstantVectorExpression(val 0) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -233,11 +232,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 9] + keyColumns: 5:double, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [7] + partitionColumns: 5:double + valueColumns: 7:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -492,11 +491,11 @@ STAGE PLANS: Map-reduce partition columns: bin (type: binary) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5, 2] + keyColumns: 10:binary, 5:double, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [10] - valueColumnNums: [7] + partitionColumns: 10:binary + valueColumns: 7:string Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -717,11 +716,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 9] + keyColumns: 2:int, 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -941,11 +939,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:double, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 7] + partitionColumns: 5:double + valueColumns: 0:tinyint, 7:string Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint), s (type: string) Execution mode: vectorized, llap @@ -1200,11 +1198,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 7] + keyColumns: 6:boolean, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1432,12 +1429,12 @@ STAGE PLANS: Map-reduce partition columns: UDFToByte(10) (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [12, 7] + keyColumns: 12:tinyint, 7:string keyExpressions: ConstantVectorExpression(val 10) -> 12:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [13] - valueColumnNums: [2] + partitionColumns: 13:tinyint + valueColumns: 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1627,11 +1624,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1800,11 +1796,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1973,11 +1968,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2146,11 +2140,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 458a55d..648f97d 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -89,11 +89,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -222,11 +221,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -355,11 +353,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -489,11 +487,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -622,11 +619,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -792,11 +789,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -957,11 +954,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -1122,11 +1119,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out index 256b80d..5ef540a 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:smallint, 2:int, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:smallint + valueColumns: 0:tinyint Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint) Execution mode: vectorized, llap @@ -340,11 +340,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -563,11 +562,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -786,10 +784,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:smallint, 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int) Execution mode: vectorized, llap @@ -10936,11 +10934,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11194,11 +11191,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11452,11 +11448,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11710,11 +11705,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11968,11 +11962,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 6, 3] + keyColumns: 2:int, 6:boolean, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -12192,12 +12185,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:char(12) keyExpressions: CastStringGroupToChar(col 7:string, maxLength 12) -> 12:char(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -12418,12 +12411,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:varchar(12) keyExpressions: CastStringGroupToVarChar(col 7:string, maxLength 12) -> 12:varchar(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out index 406bd93..97b6b62 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:float, 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -341,11 +341,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2, 7] + keyColumns: 8:timestamp, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -565,11 +564,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 3, 7] + keyColumns: 6:boolean, 3:bigint, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -789,11 +787,11 @@ STAGE PLANS: Map-reduce partition columns: dec (type: decimal(4,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [9, 4] + keyColumns: 9:decimal(4,2), 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [9] - valueColumnNums: [7] + partitionColumns: 9:decimal(4,2) + valueColumns: 7:string Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1050,10 +1048,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1103,10 +1101,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1142,6 +1139,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1351,10 +1351,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1404,10 +1404,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1444,6 +1443,9 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(4,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1654,10 +1656,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1707,10 +1709,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1746,6 +1747,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col2 (type: timestamp) Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index ae07a78..45fd7cc 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -83,11 +83,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out index bff683f..bfa7485 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -81,11 +81,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -304,11 +303,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -527,11 +525,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -751,11 +749,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7, 4] + keyColumns: 8:timestamp, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -974,11 +971,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1197,11 +1193,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1456,11 +1452,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1714,11 +1709,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1937,11 +1931,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2104,11 +2098,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2271,11 +2265,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out index fe1e538..13087c4 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out @@ -80,11 +80,10 @@ STAGE PLANS: Map-reduce partition columns: type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:string Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 1a846ab..e4eb5b5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -63,10 +63,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -119,10 +118,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -244,10 +243,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -300,10 +298,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -570,10 +567,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -626,10 +622,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -751,10 +747,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -807,10 +802,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1077,10 +1071,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -1133,10 +1126,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1258,10 +1251,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap @@ -1314,10 +1306,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1629,10 +1620,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index dbee077..e71cfa9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -95,10 +95,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index c9faf55..5ea5fac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -122,10 +122,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 82982e6..74e5671 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index eaf5157..8a59d24 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 303 Data size: 137686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 0887212..1e9d800 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -120,10 +120,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 6144 Data size: 3293884 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index dd2e5f0..43d3a82 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index d0b2f7a..ab50738 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -91,10 +91,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 96badf9..62cdc6e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -99,10 +99,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index b472c2d..405bed7 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -104,10 +104,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 122f3fb..efbbf61 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -99,10 +99,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 5124740..be9c39f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 907411b..3554928 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -97,10 +97,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 64480d7..f40c2ec 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index dd2e5f0..43d3a82 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index e6427fa..0b12efa 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -53,10 +53,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 61c5051..164edc4 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -208,6 +208,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -343,6 +346,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -479,6 +485,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index aec161d..484be90 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -315,10 +315,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -459,10 +458,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 15b62c9..d293e05 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -330,6 +330,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -473,6 +476,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -688,6 +694,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -707,6 +716,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -884,6 +896,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -903,6 +918,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -1090,6 +1108,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1233,6 +1254,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1402,6 +1426,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1545,6 +1572,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1712,6 +1742,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1870,6 +1903,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2013,6 +2049,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2156,6 +2195,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2327,6 +2369,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2462,6 +2507,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2634,6 +2682,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2807,6 +2858,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2965,6 +3019,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3103,6 +3160,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3241,6 +3301,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3424,6 +3487,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3443,6 +3509,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3616,6 +3685,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3635,6 +3707,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3807,6 +3882,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4069,6 +4147,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4435,6 +4516,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -5650,6 +5734,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 228bd9d..d0fecff 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -180,6 +180,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -414,6 +417,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -648,6 +654,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -956,6 +965,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1248,6 +1260,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1511,6 +1526,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index a134b19..406823f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1 POSTHOOK: Lineage: test1.col_1 SCRIPT [] POSTHOOK: Lineage: test1.key SCRIPT [] POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2 col3 PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2 POSTHOOK: Lineage: test2.col_2 SCRIPT [] POSTHOOK: Lineage: test2.key SCRIPT [] POSTHOOK: Lineage: test2.value SCRIPT [] -PREHOOK: query: EXPLAIN +col1 col2 col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,9 +74,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -77,12 +91,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -90,23 +118,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -128,6 +201,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -136,20 +210,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -167,9 +246,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -180,12 +266,27 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,26 +294,75 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -238,6 +388,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL @@ -245,18 +396,23 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -274,9 +430,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -287,12 +450,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -300,24 +472,72 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -342,22 +562,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -375,26 +601,62 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -402,12 +664,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 4:int, 5:int, 6:string, 0:int, 1:int, 2:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -415,6 +691,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Stage: Stage-0 Fetch Operator @@ -436,6 +728,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del @@ -443,16 +736,21 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -470,9 +768,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -483,12 +788,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -496,21 +810,65 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -533,6 +891,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -546,20 +905,25 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -601,21 +965,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -642,6 +1040,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -656,18 +1055,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,21 +1113,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -748,6 +1186,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -762,18 +1201,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -815,21 +1259,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -854,6 +1332,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -862,20 +1341,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -917,23 +1401,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -959,6 +1478,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -967,20 +1487,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -998,16 +1523,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1037,6 +1590,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1063,6 +1622,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1077,18 +1637,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1106,16 +1671,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1145,6 +1738,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1169,6 +1768,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1180,18 +1780,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1209,16 +1814,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1248,6 +1881,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1272,6 +1911,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1282,20 +1922,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1313,18 +1958,47 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1354,6 +2028,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false Stage: Stage-0 Fetch Operator @@ -1379,26 +2059,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1416,37 +2102,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1460,6 +2202,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1486,6 +2231,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1500,18 +2246,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1529,37 +2280,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1573,6 +2380,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1597,6 +2407,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1611,18 +2422,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1640,37 +2456,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1684,6 +2556,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1708,6 +2583,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1718,20 +2594,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1749,41 +2630,99 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1797,6 +2736,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1822,6 +2764,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 0dff57a..e8b41ce 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -55,6 +55,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index f05e5c0..c1a41b6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -145,13 +145,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -215,10 +217,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -279,10 +280,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 @@ -348,13 +348,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -418,10 +420,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -482,10 +483,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 @@ -551,13 +551,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -621,10 +623,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -685,10 +686,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index ccf9aae..4fc70b9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -155,11 +155,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -376,10 +376,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -420,10 +420,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -459,6 +458,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -628,11 +630,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -787,11 +789,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1006,11 +1008,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1228,11 +1230,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1452,11 +1454,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1497,10 +1499,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1572,6 +1573,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1675,10 +1679,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1711,11 +1714,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1757,6 +1760,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -2295,11 +2301,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2513,11 +2519,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2801,11 +2807,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3023,11 +3029,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3068,10 +3074,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3142,6 +3147,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -3316,11 +3324,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3554,11 +3562,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -3817,11 +3825,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4334,10 +4342,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -4652,10 +4660,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -4966,10 +4974,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5288,10 +5296,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5621,10 +5629,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5923,10 +5931,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 72216d3..e075bf7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -132,6 +132,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 3fb968f..3b36498 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:timestamp, 1:timestamp Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized, llap @@ -380,10 +379,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:struct Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct) Execution mode: vectorized, llap @@ -520,10 +518,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + valueColumns: 0:struct, 1:struct, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/mapjoin46.q.out ql/src/test/results/clientpositive/mapjoin46.q.out index 61b579a..5a589cc 100644 --- ql/src/test/results/clientpositive/mapjoin46.q.out +++ ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -233,12 +233,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -338,12 +338,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -428,10 +428,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -526,10 +526,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -538,6 +534,10 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -633,11 +633,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -646,6 +641,11 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -737,11 +737,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -750,6 +745,11 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -841,14 +841,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -942,13 +942,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1044,19 +1044,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1148,16 +1148,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1249,16 +1249,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -100 1 Bob 102 2 Del -101 2 Car 102 2 Del 99 2 Mat 103 2 Ema -101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -1352,9 +1352,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product @@ -1405,7 +1405,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1445,31 +1445,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1504,12 +1506,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1529,7 +1531,8 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1537,36 +1540,37 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -99 0 Alice NULL NULL NULL -98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1601,12 +1605,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1626,7 +1630,7 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1634,34 +1638,36 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -NULL NULL None 102 2 Del -NULL NULL NULL 105 NULL None -NULL NULL NULL 104 3 Fli +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1679,11 +1685,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE @@ -1692,24 +1696,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1721,11 +1723,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1733,37 +1735,426 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, - test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 - FROM test1 RIGHT OUTER JOIN test2 - ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) - ) sq1 -FULL OUTER JOIN ( - SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, - test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 - FROM test1 LEFT OUTER JOIN test2 - ON (test1.value=test2.value +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) ) sq2 @@ -1878,7 +2269,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1944,23 +2335,239 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del -NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL -NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_3:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_2:$hdt$_3:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL diff --git ql/src/test/results/clientpositive/mapjoin47.q.out ql/src/test/results/clientpositive/mapjoin47.q.out index af7f20f..5569370 100644 --- ql/src/test/results/clientpositive/mapjoin47.q.out +++ ql/src/test/results/clientpositive/mapjoin47.q.out @@ -1405,7 +1405,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1511,7 +1511,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/mergejoin.q.out ql/src/test/results/clientpositive/mergejoin.q.out index 664becb..172a64e 100644 --- ql/src/test/results/clientpositive/mergejoin.q.out +++ ql/src/test/results/clientpositive/mergejoin.q.out @@ -1706,7 +1706,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index a5cc23a..b70c909 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -802,7 +802,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -841,7 +841,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1139,7 +1139,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1437,7 +1437,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/optional_outer.q.out ql/src/test/results/clientpositive/optional_outer.q.out index 9ec1af7..efc952c 100644 --- ql/src/test/results/clientpositive/optional_outer.q.out +++ ql/src/test/results/clientpositive/optional_outer.q.out @@ -283,7 +283,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -344,7 +344,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query51.q.out ql/src/test/results/clientpositive/perf/spark/query51.q.out index 4a13589..49c8240 100644 --- ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -263,7 +263,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query97.q.out ql/src/test/results/clientpositive/perf/spark/query97.q.out index 14a2aed..6c5af53 100644 --- ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -192,7 +192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index 214f635..a6ad430 100644 --- ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out +++ ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out @@ -222,7 +222,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -297,7 +297,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/skewjoinopt3.q.out ql/src/test/results/clientpositive/skewjoinopt3.q.out index 0730cb1..50b25f2 100644 --- ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -245,7 +245,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -320,7 +320,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/smb_mapjoin_1.q.out index 1182e56..a8b3f9d 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_1.q.out @@ -270,7 +270,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -491,7 +491,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/smb_mapjoin_2.q.out index 1dfacda..5d4468b 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_2.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -456,7 +456,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/smb_mapjoin_3.q.out index cf4c744..1a0a0cf 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_3.q.out @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -453,7 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_46.q.out ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 1302360..302f00b 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -1316,7 +1316,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1415,7 +1415,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1512,7 +1512,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1611,7 +1611,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_47.q.out ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index c7334a8..825cd0e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -1330,7 +1330,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1436,7 +1436,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 83033b0..ef5cca6 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 118a48e..217a72d 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -968,7 +968,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out index 1b6cc08..5cb783a 100644 --- ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -96,7 +96,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 17f0d88..e14f1a6 100644 --- ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -99,7 +99,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join6.q.out ql/src/test/results/clientpositive/spark/auto_join6.q.out index 67bdc58..899f21a 100644 --- ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join7.q.out ql/src/test/results/clientpositive/spark/auto_join7.q.out index a410966..dde9608 100644 --- ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join_filters.q.out ql/src/test/results/clientpositive/spark/auto_join_filters.q.out index 8ae5a0e..6a99f8b 100644 --- ql/src/test/results/clientpositive/spark/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -54,6 +54,106 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 441 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -198,6 +298,135 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -340,6 +569,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -484,6 +723,135 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + filter predicates: + 0 {(KEY.reducesinkkey0 > 40)} {(VALUE._col0 > 50)} {(KEY.reducesinkkey0 = VALUE._col0)} + 1 {(VALUE._col0 > 40)} {(KEY.reducesinkkey0 > 50)} {(VALUE._col0 = KEY.reducesinkkey0)} + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out index 32a885b..958e812 100644 --- ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -188,6 +188,132 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/join18.q.out ql/src/test/results/clientpositive/spark/join18.q.out index 1a949b2..cbb0d65 100644 --- ql/src/test/results/clientpositive/spark/join18.q.out +++ ql/src/test/results/clientpositive/spark/join18.q.out @@ -95,7 +95,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index d0ae0ba..4e0b290 100644 --- ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join6.q.out ql/src/test/results/clientpositive/spark/join6.q.out index 3f884ca..eecacea 100644 --- ql/src/test/results/clientpositive/spark/join6.q.out +++ ql/src/test/results/clientpositive/spark/join6.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join7.q.out ql/src/test/results/clientpositive/spark/join7.q.out index d43c5cd..0012b41 100644 --- ql/src/test/results/clientpositive/spark/join7.q.out +++ ql/src/test/results/clientpositive/spark/join7.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 9b8b69c..2410c98 100644 --- ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -1227,7 +1227,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index c53cd00..681ed0d 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -849,7 +849,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -865,7 +865,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1147,7 +1147,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1429,7 +1429,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index f0318a3..3e4a325 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -562,10 +559,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -617,10 +613,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -741,10 +737,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -796,10 +791,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1062,10 +1056,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1117,10 +1110,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1241,10 +1234,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1296,10 +1288,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1607,10 +1598,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 18379f2..173835f 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index df85ce3..aa43219 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index e4db32c..24dc741 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index aef374a..0b73789 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 24cdf06..df258c3 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -119,10 +119,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index a35c9c5..f813705 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 9e9f4df..162e634 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -90,10 +90,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 8b3c5f2..88e18ed 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index dd3532b..9739650 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -103,10 +103,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 4a7b0e0..6853f98 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 56c62c3..86bbaea 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index 52b8126..3ba9b61 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -96,10 +96,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index f76df32..99aa65c 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a35c9c5..f813705 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index ce188a0..ae52bc5 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -120,10 +120,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumns: 0:tinyint, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:smallint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -276,11 +276,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:tinyint + valueColumns: 1:struct Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -436,10 +436,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -592,11 +591,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -792,10 +790,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -849,10 +847,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 6932efa..34ae2bf 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -1857,7 +1857,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2256,7 +2256,7 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 3cd65b9..31ccddc 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -271,7 +271,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -285,7 +285,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index ec10c44..9575dd9 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -291,7 +291,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -536,7 +536,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index 0bcd167..a591f76 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -252,7 +252,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -501,7 +501,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index ad879a8..0b4a94f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -251,7 +251,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -498,7 +498,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 21171db..d2a67f9 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -542,7 +542,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -974,7 +974,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1192,7 +1192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1301,7 +1301,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1407,8 +1407,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 348d165..18dff0e 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -542,7 +542,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -974,7 +974,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1192,7 +1192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1301,7 +1301,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1407,8 +1407,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 610abab..1732927 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -631,7 +631,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 1916d25..cff06ff 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -89,10 +89,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -372,10 +371,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -424,10 +422,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -575,10 +572,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -627,10 +623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -857,11 +852,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [5] + keyColumns: 5:int keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -910,11 +904,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1062,11 +1055,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [5] + keyColumns: 5:int keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1115,11 +1107,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1344,11 +1335,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [8] + keyColumns: 8:bigint keyExpressions: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 8:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 7:string)(children: StringGroupColConcatStringScalar(col 6:string, val 0)(children: CastLongToString(col 5:int)(children: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int) -> 6:string) -> 7:string) -> 5:bigint) -> 8:bigint) -> 5:bigint) -> 8:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1397,11 +1387,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [6] + keyColumns: 6:bigint keyExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1619,11 +1608,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:decimal(10,0) keyExpressions: CastLongToDecimal(col 5:smallint)(children: col 5:int) -> 6:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1672,11 +1660,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [4] + keyColumns: 4:decimal(10,0) keyExpressions: CastLongToDecimal(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1958,10 +1945,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2011,10 +1998,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2062,10 +2048,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2240,10 +2225,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2293,10 +2278,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2344,10 +2328,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2619,10 +2602,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2671,10 +2653,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2822,10 +2803,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2874,10 +2854,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3103,10 +3082,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3155,10 +3133,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3306,10 +3283,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3358,10 +3334,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3588,11 +3563,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3641,11 +3615,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3862,11 +3835,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3915,10 +3887,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4066,11 +4037,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4119,11 +4089,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4271,11 +4240,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4324,10 +4292,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4554,11 +4521,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0D)) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string keyExpressions: CastDoubleToString(col 6:double)(children: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4607,11 +4573,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToString(_col0) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [4] + keyColumns: 4:string keyExpressions: CastDoubleToString(col 0:double) -> 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4766,10 +4731,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4821,10 +4784,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4942,10 +4904,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -5018,10 +4978,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:string, 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized @@ -5069,10 +5028,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 2] + valueColumns: 0:string, 2:string Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized @@ -5319,10 +5277,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5371,10 +5328,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5591,10 +5547,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5643,10 +5598,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5851,10 +5805,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5894,10 +5847,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6095,10 +6047,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6147,10 +6098,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6413,10 +6363,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -6466,10 +6416,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6517,10 +6466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6698,10 +6646,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6749,10 +6696,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [3] + keyColumns: 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:string Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -6799,10 +6746,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6976,10 +6922,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7031,10 +6976,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7227,10 +7171,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7281,10 +7224,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7336,10 +7278,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7473,10 +7414,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -7531,10 +7471,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -7613,10 +7552,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7668,10 +7606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7864,10 +7801,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7918,10 +7854,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7973,10 +7908,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8112,10 +8046,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -8170,10 +8103,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8253,10 +8185,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8308,10 +8239,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8508,10 +8438,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -8562,10 +8491,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8617,10 +8545,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8674,10 +8601,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Vectorization: @@ -8754,10 +8680,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 9 Execution mode: vectorized @@ -8812,10 +8737,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8965,10 +8889,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -8988,10 +8915,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9208,11 +9134,14 @@ STAGE PLANS: 0 day(_col0) (type: int) 1 day(_col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [5] + bigTableKeyColumns: 5:int bigTableKeyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9232,10 +9161,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9515,13 +9443,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -9533,10 +9463,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE @@ -9556,10 +9489,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9801,10 +9733,13 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col2 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2, 3] + bigTableKeyColumns: 2:string, 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9824,10 +9759,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10043,10 +9977,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10066,10 +10003,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10277,11 +10213,14 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [5] + bigTableKeyColumns: 5:double bigTableKeyExpressions: CastStringToDouble(col 3:string) -> 5:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10301,10 +10240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10511,11 +10449,14 @@ STAGE PLANS: 0 (UDFToDouble(_col0) * 2.0D) (type: double) 1 _col0 (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [6] + bigTableKeyColumns: 6:double bigTableKeyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10535,10 +10476,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10687,10 +10627,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -10781,9 +10720,12 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Reducer 4 Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE @@ -10803,10 +10745,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11017,10 +10958,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11040,10 +10984,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11208,10 +11151,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11231,10 +11176,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11398,10 +11342,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 0 Map 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11421,10 +11367,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11684,13 +11629,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11702,10 +11649,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE @@ -11725,10 +11675,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11968,12 +11917,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [4] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11985,10 +11937,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [4] + bigTableKeyColumns: 4:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1 Data size: 207 Basic stats: PARTIAL Column stats: NONE @@ -12008,10 +11963,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -12145,10 +12099,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12200,10 +12153,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12396,10 +12348,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -12450,10 +12401,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12505,10 +12455,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12644,10 +12593,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -12702,10 +12650,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/subquery_scalar.q.out ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 7488f2e..34c4223 100644 --- ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -1477,7 +1477,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (100 < _col1) (type: boolean) + predicate: (_col1 > 100) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -2988,7 +2988,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col2 (type: int) @@ -3083,8 +3083,7 @@ POSTHOOK: Input: default@part 85768 86428 90681 -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) @@ -3097,12 +3096,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 1) - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3118,7 +3116,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -3136,7 +3134,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -3154,7 +3152,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Map 9 + Map 8 Map Operator Tree: TableScan alias: part @@ -3173,61 +3171,28 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 10 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 2 + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not (_col1 like _col9)) (type: boolean) - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 1 Data size: 32762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 32762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -3238,7 +3203,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) + predicate: ((_col10 = 0L) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -3251,7 +3216,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3262,7 +3227,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -3273,6 +3238,23 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -3280,8 +3262,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -3453,7 +3434,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + predicate: (not CASE WHEN ((_col9 = 0L)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -3469,7 +3450,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col4 (type: string) 1 _col2 (type: string) @@ -4127,7 +4108,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_type is not null) (type: boolean) + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) @@ -4180,7 +4161,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 _col2 (type: string) @@ -4192,6 +4173,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is not null and _col1 is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) @@ -6109,7 +6093,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (0.0 = _col1) (type: boolean) + predicate: (_col1 = 0.0D) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -6344,7 +6328,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (0 = _col1) (type: boolean) + predicate: (_col1 = 0) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 98c709c..a9f5047 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized @@ -276,10 +276,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(23,14), 10:decimal(23,14), 11:decimal(33,14), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized @@ -459,10 +459,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized @@ -655,10 +655,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(16,0), 10:decimal(16,0), 11:decimal(26,0), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 168aa77..afe3b1f 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -135,13 +135,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 0 Map 1 @@ -315,13 +317,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -511,13 +515,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -679,14 +685,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -840,14 +848,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1010,14 +1020,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1179,14 +1191,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1348,14 +1362,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [1] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -1517,14 +1533,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index ff1af2c..becf70d 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -203,6 +203,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -217,6 +218,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -477,6 +479,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -491,6 +494,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index bc9d102..b770a49 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -151,15 +151,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -304,15 +305,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 9a1fa53..7f52b14 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -311,15 +311,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -487,13 +488,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -796,13 +798,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -814,13 +817,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -841,10 +845,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 32bcc9b..8776659 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -370,13 +370,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -388,13 +389,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -415,10 +417,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index ec73876..01e77fc 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -562,10 +559,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -617,10 +613,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -741,10 +737,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -796,10 +791,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1062,10 +1056,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1117,10 +1110,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1241,10 +1234,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1296,10 +1288,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1607,10 +1598,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index a5d4a14..8f1ca80 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 55e3ad6..f1460f0 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index de501e7..655cf25 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index b583cee..f2aed63 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 70aacfc..2342ced 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -119,10 +119,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 61d1345..15df356 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index c333ab1..b2c5382 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -90,10 +90,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index f1ee936..2aee6cd 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index c78de72..249d4ed 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -103,10 +103,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index c924651..55d4ff6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 4cf4548..4494a1c 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 61d1345..15df356 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index c46fc03..60995b3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -52,10 +52,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 58e295d..3d451b8 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -312,10 +312,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -454,10 +453,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 93ab21e..dba53bc 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -100,6 +100,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index edc8f74..6b6cd40 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -154,11 +154,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -371,10 +371,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized @@ -414,10 +414,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -622,11 +621,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -778,11 +777,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -993,11 +992,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1211,11 +1210,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1431,11 +1430,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -1475,10 +1474,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1654,10 +1652,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1689,11 +1686,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -2264,11 +2261,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2478,11 +2475,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2761,11 +2758,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2979,11 +2976,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -3023,10 +3020,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3271,11 +3267,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -3506,11 +3502,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double) Execution mode: vectorized @@ -3763,11 +3759,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -4218,10 +4214,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -4531,10 +4527,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -4839,10 +4835,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5157,10 +5153,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5484,10 +5480,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5781,10 +5777,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 8da42f4..2679048 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -353,9 +353,9 @@ Stage-3 Map 1 vectorized File Output Operator [FS_4] table:{"name:":"default.src_autho_test"} - Select Operator [SEL_3] (rows=500/500 width=178) + Select Operator [SEL_3] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] Stage-0 Move Operator @@ -612,15 +612,15 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_10] - Limit [LIM_9] (rows=5/5 width=178) + Limit [LIM_9] (rows=5/3 width=178) Number of rows:5 - Select Operator [SEL_8] (rows=500/5 width=178) + Select Operator [SEL_8] (rows=500/3 width=178) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_7] - Select Operator [SEL_6] (rows=500/500 width=178) + Select Operator [SEL_6] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc @@ -679,7 +679,7 @@ Stage-3 Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_6] (rows=1/3 width=352) predicate:(userid <= 13L) - TableScan [TS_0] (rows=1/15000 width=352) + TableScan [TS_0] (rows=1/15 width=352) default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] PARTITION_ONLY_SHUFFLE [RS_7] Select Operator [SEL_6] (rows=1/3 width=352) @@ -847,23 +847,23 @@ Stage-0 Stage-1 Map 2 vectorized File Output Operator [FS_34] - Select Operator [SEL_33] (rows=391/480 width=186) + Select Operator [SEL_33] (rows=391/54 width=186) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_32] (rows=391/480 width=186) + Map Join Operator [MAPJOIN_32] (rows=391/54 width=186) BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized MULTICAST [RS_29] PartitionCols:_col0 - Select Operator [SEL_28] (rows=242/242 width=95) + Select Operator [SEL_28] (rows=242/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_27] (rows=242/242 width=95) + Filter Operator [FIL_27] (rows=242/4 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=95) + TableScan [TS_0] (rows=242/4 width=95) default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_31] (rows=500/500 width=95) + <-Select Operator [SEL_31] (rows=500/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=500/500 width=95) + Filter Operator [FIL_30] (rows=500/4 width=95) predicate:key is not null - TableScan [TS_3] (rows=500/500 width=95) + TableScan [TS_3] (rows=500/4 width=95) default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out index fd2e95d..3cdd5b3 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out @@ -338,7 +338,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] Map Join Operator [MAPJOIN_17] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 @@ -433,7 +433,7 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] Map Join Operator [MAPJOIN_18] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 @@ -531,7 +531,7 @@ Stage-0 SHUFFLE [RS_10] PartitionCols:_col0 Map Join Operator [MAPJOIN_20] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index b8a1f90..cc9510b 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -121,10 +121,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumns: 0:tinyint, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:smallint Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -278,11 +278,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:tinyint + valueColumns: 1:struct Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -439,10 +439,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -596,11 +595,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: @@ -797,10 +795,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -854,10 +852,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index a723f00..51b56676 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -279,7 +279,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -372,7 +372,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -959,7 +959,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1063,7 +1063,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1310,7 +1310,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1414,7 +1414,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index f1b5627..55d3632 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -166,15 +169,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -259,15 +264,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -309,6 +316,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -363,6 +371,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -382,15 +396,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -432,6 +448,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -486,6 +503,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -505,15 +528,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -555,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -609,6 +635,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -628,15 +660,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -678,6 +712,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -732,6 +767,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -751,6 +792,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/vectorized_join46_mr.q.out ql/src/test/results/clientpositive/vectorized_join46_mr.q.out new file mode 100644 index 0000000..40c2430 --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_join46_mr.q.out @@ -0,0 +1,2093 @@ +PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test1 +POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.col_1 SCRIPT [] +POSTHOOK: Lineage: test1.key SCRIPT [] +POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2 col3 +PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test2 +POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.col_2 SCRIPT [] +POSTHOOK: Lineage: test2.key SCRIPT [] +POSTHOOK: Lineage: test2.value SCRIPT [] +col1 col2 col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key BETWEEN 100 AND 102 (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col0 BETWEEN 100 AND 102} + 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: MAPJOIN operator: Non-equi joins not supported + vectorized: false + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index 22aadbb..9cfd8f2 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -599,8 +599,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 5e25c47..953604c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -181,7 +181,9 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { if ((nextFree + length) > buffer.length) { increaseBufferSpace(length); } - System.arraycopy(sourceBuf, start, buffer, nextFree, length); + if (length > 0) { + System.arraycopy(sourceBuf, start, buffer, nextFree, length); + } vector[elementNum] = buffer; this.start[elementNum] = nextFree; this.length[elementNum] = length; diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index dcbba7a..3095114 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -38,8 +38,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testSaveAndRetrieve() throws Exception {