diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index fb926eb..a06d6a3 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1209,6 +1209,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "joins unnecessary memory will be allocated and then trimmed."), HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), + HIVEMAPJOINFULLOUER("hive.mapjoin.full.outer", true, + "Whether to use MapJoin for FULL OUTER JOINs."), HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, "How many rows with the same key value should be cached in memory per smb joined table."), diff --git data/files/fullouter_long_big_1a.txt data/files/fullouter_long_big_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/fullouter_long_big_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/fullouter_long_big_1a_nonull.txt data/files/fullouter_long_big_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/fullouter_long_big_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/fullouter_long_big_1b.txt data/files/fullouter_long_big_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/fullouter_long_big_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/fullouter_long_big_1c.txt data/files/fullouter_long_big_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/fullouter_long_big_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/fullouter_long_big_1d.txt data/files/fullouter_long_big_1d.txt new file mode 100644 index 0000000..4137f67 --- /dev/null +++ data/files/fullouter_long_big_1d.txt @@ -0,0 +1,12 @@ +-702028721 +-702028721 +-1780951928 +-670834064 +-814597051 +\N +-814597051 +-814597051 +-702028721 +-2038654700 +\N +-814597051 diff --git data/files/fullouter_long_small_1a.txt data/files/fullouter_long_small_1a.txt new file mode 100644 index 0000000..45d5825 --- /dev/null +++ data/files/fullouter_long_small_1a.txt @@ -0,0 +1,54 @@ +-1339636982994067311,2000-06-20 +-2575185053386712613,2105-01-21 +\N,2098-02-10 +-6784441713807772877,1845-02-16 +\N,2024-01-23 +-4224290881682877258,2185-07-08 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +434940853096155515,2275-02-08 +3873405809071478736,2034-06-09 +-2184423060953067642,1880-10-06 +7297177530102477725,1921-05-11 +7937120928560087303,2083-03-14 +\N,2242-02-08 +-2688622006344936758,2129-01-11 +214451696109242839,1977-01-04 +-4961171400048338491,2196-08-10 +4436884039838843341,2031-05-23 +2438535236662373438,1916-01-10 +6049335087268933751,2282-06-09 +8755921538765428593,1827-05-01 +5252407779338300447,2039-03-10 +-2184423060953067642,1853-07-06 +7297177530102477725,1926-04-12 +-2098090254092150988,1817-03-12 +-5754527700632192146,1958-07-15 +-614848861623872247,2112-11-09 +5246983111579595707,1817-07-01 +-2098090254092150988,2219-12-23 +-5706981533666803767,2151-06-09 +7297177530102477725,2125-08-26 +-7707546703881534780,2134-08-20 +214451696109242839,2179-04-18 +3845554233155411208,1805-11-10 +3905351789241845882,2045-12-05 +2438535236662373438,2026-06-23 +-2688622006344936758,1948-10-15 +6049335087268933751,2086-12-17 +-2575185053386712613,1809-07-12 +-327698348664467755,2222-10-15 +-4224290881682877258,1813-05-17 +3873405809071478736,2164-04-23 +-5706981533666803767,1800-09-20 +214451696109242839,1855-05-12 +2438535236662373438,1881-09-16 +5252407779338300447,2042-04-26 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-6784441713807772877,2054-06-17 +5246983111579595707,2260-05-11 +-1339636982994067311,2008-12-03 +3873405809071478736,1918-11-20 +-4224290881682877258,2120-01-16 +3845554233155411208,2264-04-05 diff --git data/files/fullouter_long_small_1a_nonull.txt data/files/fullouter_long_small_1a_nonull.txt new file mode 100644 index 0000000..bf94d5a --- /dev/null +++ data/files/fullouter_long_small_1a_nonull.txt @@ -0,0 +1,51 @@ +5246983111579595707,1817-07-01 +4436884039838843341,2031-05-23 +-4224290881682877258,1813-05-17 +-4961171400048338491,2196-08-10 +-2575185053386712613,2105-01-21 +5252407779338300447,2042-04-26 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +2438535236662373438,1881-09-16 +214451696109242839,2179-04-18 +2438535236662373438,2026-06-23 +-2184423060953067642,1853-07-06 +3873405809071478736,2164-04-23 +214451696109242839,1855-05-12 +-6784441713807772877,1845-02-16 +-2688622006344936758,1948-10-15 +7297177530102477725,1921-05-11 +-2575185053386712613,1809-07-12 +3905351789241845882,2045-12-05 +3845554233155411208,1805-11-10 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-1339636982994067311,2008-12-03 +7297177530102477725,2125-08-26 +7297177530102477725,1926-04-12 +-5706981533666803767,1800-09-20 +6049335087268933751,2282-06-09 +3845554233155411208,2264-04-05 +8755921538765428593,1827-05-01 +-1339636982994067311,2000-06-20 +-2098090254092150988,1817-03-12 +3873405809071478736,2034-06-09 +2438535236662373438,1916-01-10 +5246983111579595707,2260-05-11 +-5706981533666803767,2151-06-09 +-614848861623872247,2112-11-09 +-327698348664467755,2222-10-15 +-2184423060953067642,1880-10-06 +434940853096155515,2275-02-08 +-4224290881682877258,2120-01-16 +-5754527700632192146,1958-07-15 +-4224290881682877258,2185-07-08 +-2098090254092150988,2219-12-23 +-7707546703881534780,2134-08-20 +214451696109242839,1977-01-04 +-2688622006344936758,2129-01-11 +7937120928560087303,2083-03-14 +-6784441713807772877,2054-06-17 +3873405809071478736,1918-11-20 +6049335087268933751,2086-12-17 +5252407779338300447,2039-03-10 diff --git data/files/fullouter_long_small_1b.txt data/files/fullouter_long_small_1b.txt new file mode 100644 index 0000000..7d45fe4 --- /dev/null +++ data/files/fullouter_long_small_1b.txt @@ -0,0 +1,72 @@ +2748,2298-06-20 21:01:24 +11232,2533-11-26 12:22:18 +\N,2124-05-07 15:01:19.021 +3198,2428-06-13 16:21:33.955 +-7624,2219-12-03 17:07:19 +24870,2752-12-26 12:32:23.03685163 +14865,2943-03-21 00:42:10.505 +-8624,2644-05-04 04:45:07.839 +-30059,2269-05-04 21:23:44.000339209 +14865,2079-10-06 16:54:35.117 +-8435,2834-12-06 16:38:18.901 +10553,2168-05-05 21:10:59.000152113 +-8624,2282-03-28 07:58:16 +-15361,2219-09-15 20:15:03.000169887 +-14172,1918-09-13 11:44:24.496926711 +26484,1919-03-04 07:32:37.519 +-14172,2355-01-14 23:23:34 +-24775,2920-08-06 15:58:28.261059449 +-23117,2037-01-05 21:52:30.685952759 +17125,2236-07-14 01:54:40.927230276 +21181,2253-03-12 11:55:48.332 +-7373,2662-10-28 12:07:02.000526564 +-8087,2550-06-26 23:57:42.588007617 +29407,2385-12-14 06:03:39.597 +21181,2434-02-20 00:46:29.633 +-14172,2809-06-07 02:10:58 +13598,2421-05-20 14:18:31.000264698 +2748,2759-02-13 18:04:36.000307355 +-22422,1949-03-13 00:07:53.075 +26484,2953-03-10 02:05:26.508953676 +4510,2777-03-24 03:44:28.000169723 +-24775,2035-03-26 08:11:23.375224153 +-30059,2713-10-13 09:28:49 +-20517,2774-06-23 12:04:06.5 +11232,2038-04-06 14:53:59 +32030,2101-09-09 07:35:05.145 +-29600,2333-11-02 15:06:30 +-30306,2619-05-24 10:35:58.000774018 +-7624,2289-08-28 00:14:34 +-4279,2470-08-12 11:21:14.000955747 +-4279,2214-09-10 03:53:06 +-26998,2428-12-26 07:53:45.96925825 +17125,2629-11-15 15:34:52 +-8087,2923-07-02 11:40:26.115 +2632,2561-12-15 15:42:27 +21436,2696-05-08 05:19:24.112 +\N,2971-08-07 12:02:11.000948152 +-7624,2623-03-20 03:18:45.00006465 +-26998,2926-07-18 09:02:46.077 +11232,2507-01-27 22:04:22.49661421 +-30059,2420-12-10 22:12:30 +-15427,2355-01-08 12:34:11.617 +3198,2223-04-14 13:20:49 +-19167,2319-08-26 11:07:11.268 +14865,2220-02-28 03:41:36 +-20517,2233-12-20 04:06:56.666522799 +-15427,2046-06-07 22:58:40.728 +2748,2862-04-20 13:12:39.482805897 +-8435,2642-02-07 11:45:04.353231638 +-19167,2230-12-22 20:25:39.000242111 +-15427,2023-11-09 19:31:21 +13598,2909-06-25 23:22:50 +21436,2526-09-22 23:44:55 +-15361,2434-08-13 20:37:07.000172979 +4510,2293-01-17 13:47:41.00001006 +-8624,2120-02-15 15:36:40.000758423 +-22422,2337-07-19 06:33:02.000353352 +-26998,2268-08-04 12:48:11.848006292 +-22422,2982-12-28 06:30:26.000883228 +\N,2933-06-20 11:48:09.000839488 +3198,2736-12-20 03:59:50.343550301 +-20824,2478-11-05 00:28:05 diff --git data/files/fullouter_long_small_1c.txt data/files/fullouter_long_small_1c.txt new file mode 100644 index 0000000..ff323d3 --- /dev/null +++ data/files/fullouter_long_small_1c.txt @@ -0,0 +1,81 @@ +-1093006502,-69.55665828 +452719211,83003.43722 +1242586043,71.1485 +-934092157,-7843850349.57130038 +294598722,-3542.6 +284554389,5.727146 +90660785,12590.288613 +-99948814,-38076694.3981 +466567142,-9763217822.129028 +1909136587,-8610.078036935181 +1242586043,-4 +\N,1.089120893565337 +1039864870,987601.57 +-466171792,0 +-1681455031,-6.4543 +1755897735,-39.965207 +1585021913,745222.66808954 +448130683,-4302.485366846491 +193709887,0.8 +-424713789,0.48 +1585021913,607.22747 +-1250662632,5454127198.951479 +294598722,-9377326244.444 +193709887,-19889.83 +1039864870,0.7 +1242586043,-749975924224.63 +-1250662632,-544.554649 +-1740848088,-9.157 +-369457052,7.7 +-369457052,560.11907883090455 +90660785,-4564.517185 +466567142,-58810.60586 +466567142,196.5785295398584 +1738753776,1525.280459649262 +1816559437,-1035.7009 +-1490239076,92253.232096 +1039864870,94.04 +560745412,678.25 +-466171792,4227.5344 +1561921421,53050.55 +-99948814,-96386.438 +1519948464,152 +1719049112,-7888197 +-793950320,-16 +-466171792,69.9 +1738753776,-99817635066320.2416 +1091836730,0.02 +891262439,-0.04 +452719211,3020.2938930744636 +-2048404259,3939387044.1 +698032489,-330457.4292625839 +-1197550983,-0.5588796922 +-2123273881,-55.89198 +-2048404259,-0.3222960446251 +1585021913,-5762331.06697112 +1785750809,47443.115 +1909136587,181.07681535944 +1801735854,-1760956929364.267 +\N,4.26165227 +1801735854,-438541294.7 +150678276,-8278 +1479580778,92077343080.7 +1091836730,-5017.14 +193709887,-0.5663 +-1681455031,-11105.372477 +-1250662632,93104 +-1197550983,0.1 +\N,682070836.2649603 +-1197550983,71852.8338674412613 +1561921421,-5.405 +-1740848088,0.506394259 +150678276,15989394.8436 +-793950320,-0.1 +-1740848088,901.441 +-477147437,6 +-1264372462,0.883 +-2123273881,3.959 +-1264372462,-6993985240226 +-1264372462,-899 +-243940373,-97176129669.654953 +-243940373,-583.258 diff --git data/files/fullouter_long_small_1d.txt data/files/fullouter_long_small_1d.txt new file mode 100644 index 0000000..9778d3f --- /dev/null +++ data/files/fullouter_long_small_1d.txt @@ -0,0 +1,39 @@ +533298451 +1164387380 +1614287784 +1635405412 +-1912571616 +-894799664 +-1210744742 +-1014271154 +-747044796 +-1003639073 +436878811 +-1323620496 +-1379355738 +-1712018127 +246169862 +1431997749 +670834064 +1780951928 +-707688773 +1997943409 +1372592319 +-932176731 +162858059 +-683339273 +-497171161 +699863556 +1685473722 +41376947 +-1036083124 +1825107160 +-2038654700 +2119085509 +260588085 +-1792852276 +1831520491 +103640700 +\N +699007128 +1840266070 diff --git data/files/fullouter_multikey_big_1a.txt data/files/fullouter_multikey_big_1a.txt new file mode 100644 index 0000000..fe38c7b --- /dev/null +++ data/files/fullouter_multikey_big_1a.txt @@ -0,0 +1,13 @@ +22767,-1969080993 +-17582,-1730236061 +3556,\N +-17582,1082230084 +-17582,827141667 +1499,371855128 +-17582,9637312 +\N,1082230084 +-6131,-1969080993 +3556,-1969080993 +\N,\N +-18222,-1969080993 +-17582,267529350 diff --git data/files/fullouter_multikey_big_1a_nonull.txt data/files/fullouter_multikey_big_1a_nonull.txt new file mode 100644 index 0000000..40e84b0 --- /dev/null +++ data/files/fullouter_multikey_big_1a_nonull.txt @@ -0,0 +1,10 @@ +-17582,1082230084 +22767,-1969080993 +-17582,827141667 +-17582,-1730236061 +3556,-1969080993 +-6131,-1969080993 +-18222,-1969080993 +1499,371855128 +-17582,267529350 +-17582,9637312 diff --git data/files/fullouter_multikey_big_1b.txt data/files/fullouter_multikey_big_1b.txt new file mode 100644 index 0000000..40cfb9a --- /dev/null +++ data/files/fullouter_multikey_big_1b.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309,21635,ANCO +\N,21635,ANCO +2686-05-23 07:46:46.565832918,13212,NCYBDW +2082-07-14 04:00:40.695380469,12556,NCYBDW +2188-06-04 15:03:14.963259704,9468,AAA +2608-02-23 23:44:02.546440891,26184,NCYBDW +2093-04-10 23:36:54.846,\N,\N +2898-10-01 22:27:02.000871113,10361,NCYBDW +2306-06-21 11:02:00.143124239,1446,\N +\N,-6909,\N +\N,\N,\N +2306-06-21 11:02:00.143124239,-6909,NCYBDW +2093-04-10 23:36:54.846,1446,GHZVPWFO +\N,\N,CCWYD +2686-05-23 07:46:46.565832918,\N,GHZVPWFO +2093-04-10 23:36:54.846,28996,Q +2299-11-15 16:41:30.401,-31077,NCYBDW diff --git data/files/fullouter_multikey_small_1a.txt data/files/fullouter_multikey_small_1a.txt new file mode 100644 index 0000000..4e0742c --- /dev/null +++ data/files/fullouter_multikey_small_1a.txt @@ -0,0 +1,92 @@ +23015,258882280 +23015,-276888585 +21186,-586336015 +-22311,-2055239583 +3412,-1249487623 +\N,1082230084 +20156,-1618478138 +-17788,-738743861 +-24206,-1456409156 +30353,2044473567 +20969,-1995259010 +-23457,-63842445 +3412,-2081156563 +-6131,-1969080993 +23015,-252525791 +30353,1364268303 +23015,564751472 +15404,1078466156 +4586,-586336015 +-4117,-1386947816 +-26894,-63842445 +-17788,-1361776766 +-7386,-2112062470 +23015,-1893013623 +30353,1241923267 +-24206,641361618 +-28129,-2055239583 +-20125,-1995259010 +16166,931172175 +31443,-1968665833 +-28313,837320573 +11460,1078466156 +15061,-63842445 +13672,-63842445 +14400,-825652334 +-7386,100736776 +26944,-1995259010 +-11868,97203778 +12089,-63842445 +-28137,-63842445 +3412,1253976194 +-980,2009785365 +16696,-63842445 +-11868,930596435 +4902,1078466156 +-17582,267529350 +-12252,964377504 +20156,963883665 +-11868,1658440922 +4779,-1995259010 +-7386,-1635102480 +-28313,51228026 +-11868,1052120431 +-980,-270600267 +-20900,1078466156 +\N,\N +20156,1165375499 +30353,-1507157031 +3412,-1196037018 +22934,-1695419330 +30353,105613996 +-17788,-872691214 +-980,-333603940 +30353,-1011627089 +-11868,-3536499 +-2407,1078466156 +23015,-217613200 +-28313,-706104224 +-980,712692345 +-11868,1456809245 +-17788,528419995 +-11868,-915441041 +-980,628784462 +30353,-1007182618 +23015,-696928205 +-980,356970043 +23015,-893234501 +-980,-465544127 +-5734,1078466156 +-980,-801821285 +26738,-2055239583 +8177,-1995259010 +-11868,1318114822 +3890,1411429004 +-6061,-586336015 +3412,-2132472060 +-15212,-2055239583 +-12252,1956403781 +5957,-1995259010 +-1787,-63842445 +20156,1855042153 +-980,1310479628 diff --git data/files/fullouter_multikey_small_1a_nonull.txt data/files/fullouter_multikey_small_1a_nonull.txt new file mode 100644 index 0000000..2a8b9a1 --- /dev/null +++ data/files/fullouter_multikey_small_1a_nonull.txt @@ -0,0 +1,90 @@ +16696,-63842445 +4586,-586336015 +26738,-2055239583 +-17788,-738743861 +-28313,-706104224 +-23457,-63842445 +-20900,1078466156 +-12252,964377504 +-28313,51228026 +-11868,-3536499 +11460,1078466156 +26944,-1995259010 +20156,1855042153 +-11868,97203778 +15061,-63842445 +-17788,528419995 +-26894,-63842445 +-28313,837320573 +20156,963883665 +-15212,-2055239583 +5957,-1995259010 +30353,-1011627089 +3890,1411429004 +-980,-333603940 +13672,-63842445 +-980,628784462 +23015,-252525791 +-11868,1052120431 +-980,356970043 +23015,-217613200 +-6061,-586336015 +-5734,1078466156 +-11868,1318114822 +23015,258882280 +-2407,1078466156 +12089,-63842445 +3412,-2132472060 +-28129,-2055239583 +-980,-270600267 +16166,931172175 +-7386,100736776 +4902,1078466156 +20969,-1995259010 +22934,-1695419330 +3412,-1249487623 +3412,1253976194 +21186,-586336015 +8177,-1995259010 +-7386,-1635102480 +-11868,1456809245 +-20125,-1995259010 +-980,-801821285 +-980,1310479628 +23015,564751472 +23015,-893234501 +4779,-1995259010 +-980,2009785365 +-24206,641361618 +30353,-1507157031 +14400,-825652334 +3412,-2081156563 +20156,-1618478138 +31443,-1968665833 +-22311,-2055239583 +30353,1241923267 +-11868,930596435 +-17788,-1361776766 +-24206,-1456409156 +-7386,-2112062470 +30353,1364268303 +23015,-1893013623 +-17788,-872691214 +30353,2044473567 +-28137,-63842445 +30353,105613996 +-6131,-1969080993 +-17582,267529350 +23015,-276888585 +-12252,1956403781 +23015,-696928205 +-11868,1658440922 +-1787,-63842445 +-11868,-915441041 +-980,-465544127 +30353,-1007182618 +-980,712692345 +20156,1165375499 +3412,-1196037018 +15404,1078466156 +-4117,-1386947816 diff --git data/files/fullouter_multikey_small_1b.txt data/files/fullouter_multikey_small_1b.txt new file mode 100644 index 0000000..b56a3f7 --- /dev/null +++ data/files/fullouter_multikey_small_1b.txt @@ -0,0 +1,118 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 +2512-10-06 03:03:03,1560,X,761196.522 +2304-12-15 15:31:16,1301,T,2720.8 +1919-06-20 00:16:50.611028595,20223,ZKBC,-23 +2897-08-10 15:21:47.09,23663,XYUVBED,51.7323303273 +2086-04-09 00:03:10,20223,THXNJGFFV,-85184687349898.892 +2238-05-17 19:27:25.519,20223,KQCM,-0.01095 +2086-04-09 00:03:10,20223,THXNJGFFV,482.5383411359219 +2480-10-02 09:31:37.000770961,-26373,NBN,-5875.5197252 +2086-04-09 00:03:10,20223,THXNJGFFV,0.4396861 +2759-11-26 22:19:55.410967136,-27454,ZMY,60.6025797 +2083-06-07 09:35:19.383,-26373,MR,67892053.02376094 +2882-05-20 07:21:25.221299462,23196,U,-9951044 +2971-02-14 09:13:19,-16605,BVACIRP,-27394351.3 +2512-10-06 03:03:03,24313,QBHUG,-8423.151573236 +2882-05-20 07:21:25.221299462,23196,U,-4244.926206619 +1905-04-20 13:42:25.000469776,2638,KAUUFF,7 +2410-05-03 13:44:56,2638,PHOR,-769088.176482 +2668-06-25 07:12:37.000970744,2638,TJE,-2.7796827 +2969-01-23 14:08:04.000667259,-32485,AGEPWWLJF,-48431309405.652522 +2410-05-03 13:44:56,2638,PHOR,93262.914526611 +2512-10-06 03:03:03,13195,CRJ,14 +2018-11-25 22:27:55.84,-12202,VBDBM,98790.713907420831 +2304-12-15 15:31:16,8650,RLNO,-0.4355 +2071-07-21 20:02:32.000250697,2638,NRUV,-66198.351092 +2525-05-12 15:59:35,-24459,SAVRGA,53106747151.8633 +2637-03-12 22:25:46.385,21841,CXTI,749563668434009.65 +2018-11-25 22:27:55.84,-22419,LOTLS,342.3726040228584 +2637-03-12 22:25:46.385,21841,CXTI,7362887891522.3782 +2038-10-12 09:15:33.000539653,-19598,YKNIAJW,-642807895924.66 +2957-05-07 10:41:46,20223,OWQT,-586953.153681 +2304-12-15 15:31:16,11101,YJCKKCR,1279917802.42 +2355-09-23 19:52:34.638084141,-19598,H,92.15 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,2.1577659 +2355-09-23 19:52:34.638084141,-19598,H,74179461.880493 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-81 +\N,-12914,ZVEUKC,221 +2743-12-27 05:16:19.000573579,-12914,ZVEUKC,-811984611.5178497 +1957-02-01 14:00:29.000548421,-16085,ZVEUKC,-2312.8149 +2201-07-05 17:22:06.084206844,-24459,UBGT,1.5069483282 +2461-03-09 09:54:45.000982385,-16454,ZSMB,8694.89 +2169-04-02 06:30:32,23855,PDVQATOS,-1515597428 +2304-12-15 15:31:16,30285,GSJPSIYOU,0.2 +2913-07-17 15:06:58.041,-10206,\N,-0.2 +2169-04-02 06:30:32,23855,PDVQATOS,-4016.9608 +2759-11-26 22:19:55.410967136,-27454,ZMY,368 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,726945733.4193 +2304-12-15 15:31:16,11101,YJCKKCR,-0.5 +2462-12-16 23:11:32.633305644,-26373,CB,-582687 +2357-05-08 07:09:09.000482799,6226,ZSMB,-32.46 +2304-12-15 15:31:16,12587,OPW,-4.59489504 diff --git data/files/fullouter_string_big_1a.txt data/files/fullouter_string_big_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/fullouter_string_big_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/fullouter_string_big_1a_nonull.txt data/files/fullouter_string_big_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/fullouter_string_big_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/fullouter_string_big_1a_old.txt data/files/fullouter_string_big_1a_old.txt new file mode 100644 index 0000000..1fa51ad --- /dev/null +++ data/files/fullouter_string_big_1a_old.txt @@ -0,0 +1,13 @@ +WXHJ +WXHJ +WXHJ +WXHJ +WXHJ +QNCYBDW +PXLD +PXLD +PXLD +UA +\N +FTWURVH +MXGDMBD diff --git data/files/fullouter_string_small_1a.txt data/files/fullouter_string_small_1a.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/fullouter_string_small_1a.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/fullouter_string_small_1a_nonull.txt data/files/fullouter_string_small_1a_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/fullouter_string_small_1a_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git data/files/fullouter_string_small_1a_old.txt data/files/fullouter_string_small_1a_old.txt new file mode 100644 index 0000000..505c403 --- /dev/null +++ data/files/fullouter_string_small_1a_old.txt @@ -0,0 +1,38 @@ +,2021-02-21,2802-04-21 18:48:18.5933838 +,1985-01-22,2111-01-10 15:44:28 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +KL,1980-09-22,2073-08-25 11:51:10.318 +FYW,1807-03-20,2305-08-17 01:32:44 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BEP,2141-02-19,2521-06-09 01:20:07.121 +BEP,2206-08-10,2331-10-09 10:59:51 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +IWEZJHKE,\N,\N +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +GOYJHW,1959-04-27,\N +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +ZNOUDCR,\N,1988-04-23 08:40:21 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +\N,1865-11-08,2893-04-07 07:36:12 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +\N,1915-02-22,2554-10-27 09:34:30 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +LOTLS,2126-09-16,1977-12-15 15:28:56 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java index af446db..e24ee66 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -19,6 +19,11 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.tez.ObjectCache; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -26,6 +31,11 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -75,7 +85,8 @@ public void bench() throws Exception { protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, + String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, int[] bigTableRetainColumnNums, int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, @@ -85,12 +96,12 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, this.mapJoinImplementation = mapJoinImplementation; testDesc = new MapJoinTestDescription( hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, + bigTableTypeInfos, bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + MapJoinPlanVariation.SHARED_SMALL_TABLE); // Prepare data. Good for ANY implementation variation. testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); @@ -109,7 +120,7 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, if (!isVectorOutput) { bigTableRows = VectorBatchGenerateUtil.generateRowObjectArray( - testDesc.bigTableKeyTypeInfos, testData.getBigTableBatchStream(), + testDesc.bigTableTypeInfos, testData.getBigTableBatchStream(), testData.getBigTableBatch(), testDesc.outputObjectInspectors); } else { @@ -140,10 +151,21 @@ protected static MapJoinOperator setupBenchmarkImplementation( Operator testCollectorOperator = (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator()); - - MapJoinOperator operator = + + // UNDONE: We need to plumb down shareMapJoinTableContainer.... + CreateMapJoinResult createMapJoinResult = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc, + /* shareMapJoinTableContainer */ null); + MapJoinOperator operator = createMapJoinResult.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = createMapJoinResult.mapJoinTableContainer; + + // Invoke initializeOp methods. + operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return operator; } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index c9da92a..aa88297 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -59,7 +59,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index a6b4719..60b2890 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 1b31038..937ede1 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..14ff8af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -21,12 +21,14 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.Future; import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +43,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +72,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +82,8 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +113,13 @@ protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + protected transient boolean isFullOuterMapJoin; + protected transient boolean isFullOuterForwardKeysToIntersect; + protected transient boolean isFullOuterIntersect; + + protected transient int fullOuterBigTableRetainSize; + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +192,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { hybridMapJoinLeftover = false; firstSmallTable = null; + dpFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +269,28 @@ protected void completeInitializationOp(Object[] os) throws HiveException { } } + private void dpFullOuterMapJoinInit() { + + // This will be set during the first process call or during closeOp if no rows processed. + matchTracker = null; + + isFullOuterMapJoin = (condn.length == 1 && condn[0].getType() == JoinDesc.FULL_OUTER_JOIN); + if (isFullOuterMapJoin) { + fullOuterBigTableRetainSize = conf.getRetainList().get(posBigTable).size(); + isFullOuterForwardKeysToIntersect = !conf.isDynamicPartitionHashJoin(); + if (isFullOuterForwardKeysToIntersect) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator.. + auxiliaryChildIndex = 1; + } + isFullOuterIntersect = conf.isFullOuterIntersect(); + } else { + isFullOuterForwardKeysToIntersect = false; + isFullOuterIntersect = false; + } + } + @VisibleForTesting public void setTestMapJoinTableContainer(int posSmallTable, MapJoinTableContainer testMapJoinTableContainer, @@ -415,6 +454,27 @@ public void cleanUpInputFileChangedOp() throws HiveException { return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]); } + protected JoinUtil.JoinResult setMapJoinKeyNoNulls( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromRowNoNulls(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + protected void setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + dest.setFromRowNoResult(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + Object standardFirstTimeMatchRow = + ObjectInspectorUtils.copyToStandardObject( + firstTimeMatchRow, inputObjInspectors[posBigTable], ObjectInspectorCopyOption.WRITABLE); + forwardAuxiliary(standardFirstTimeMatchRow, outputOI); + } + protected MapJoinKey getRefKey(byte alias) { // We assume that since we are joining on the same key, all tables would have either // optimized or non-optimized key; hence, we can pass any key in any table as reference. @@ -437,6 +497,10 @@ public void process(Object row, int tag) throws HiveException { for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey); + if (isFullOuterMapJoin) { + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetters[pos]).createMatchTracker(); + } } } } @@ -463,7 +527,22 @@ public void process(Object row, int tag) throws HiveException { ReusableGetAdaptor adaptor; if (firstSetKey == null) { adaptor = firstSetKey = hashMapRowGetters[pos]; - joinResult = setMapJoinKey(firstSetKey, row, alias); + if (!isFullOuterMapJoin) { + joinResult = setMapJoinKey(firstSetKey, row, alias); + } else if (!isFullOuterIntersect) { + // We do not match if key has any NULLs. + joinResult = setMapJoinKeyNoNulls(firstSetKey, row, alias, matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + forwardFirstTimeMatchToFullOuterIntersect(row, outputObjInspector); + } + } else { + // For FULL OUTER MapJoin Intersect, we just lookup the key. + // (NOTE: Keys with NULLs should not arrive here). + setMapJoinKeyNoResult(firstSetKey, row, alias, matchTracker); + return; + } } else { // Keys for all tables are the same, so only the first has to deserialize them. adaptor = hashMapRowGetters[pos]; @@ -544,8 +623,122 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object bigTable.add(row); } + protected void generateFullOuterSmallTableNoMatches() throws HiveException { + + // FUTURE: Currently, in the MapJoinOperaotr, we only support FULL OUTER MapJoin for + // FUTURE MapJoinBytesTableContainer. NOTE: Vectorization code will override this method. + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + MapJoinBytesTableContainer smallTable = null; + byte smallTablePos = -1; + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != conf.getPosBigTable()) { + smallTable = (MapJoinBytesTableContainer) mapJoinTables[pos]; + smallTablePos = pos; + if (matchTracker == null) { + + // When the process method isn't called (i.e. no rows), then we need to create the + // MatchTracker here. + // + ReusableGetAdaptor hashMapRowGetter = smallTable.createGetter(null); + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetter).createMatchTracker(); + } + break; + } + } + Preconditions.checkState(smallTablePos != -1); + + boolean isSmallTableValuesOnly = false; + int[] smallTableValuesIndex = conf.getValueIndex(smallTablePos); + if (smallTableValuesIndex == null) { + List valuesList = conf.getRetainList().get(smallTablePos); + smallTableValuesIndex = + ArrayUtils.toPrimitive(valuesList.toArray(new Integer[0])); + isSmallTableValuesOnly = true; + } + final int smallTableValuesIndexSize = smallTableValuesIndex.length; + + // Our first output column for Small Table results is based on order. (The Big Table columns + // will all be NULL). + final int firstOutputColumnNum = (posBigTable == (byte) 0 ? fullOuterBigTableRetainSize : 0); + + /* + * Create iterator that produces each non-matched Small Table key and a ReusableRowContainer + * the Small Table values. + */ + NonMatchedSmallTableIterator nonMatchedIterator = + smallTable.createNonMatchedSmallTableIterator(matchTracker); + int nonMatchedKeyCount = 0; + int nonMatchedValueCount = 0; + while (nonMatchedIterator.isNext()) { + List keyObjList = nonMatchedIterator.getCurrentKey(); + + MapJoinRowContainer values = nonMatchedIterator.getCurrentRows(); + AbstractRowContainer.RowIterator> iter = values.rowIter(); + for (List valueObjList = iter.first(); + valueObjList != null; + valueObjList = iter.next()) { + + // Form non-matched Small Table join result. We only fill in the Small Table columns, + // so the Big Table retained columns are NULLs from the new allocation. + + Object[] row = new Object[fullOuterBigTableRetainSize + smallTableValuesIndexSize]; + int outputColumnNum = firstOutputColumnNum; + + if (isSmallTableValuesOnly) { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + row[outputColumnNum++] = valueObjList.get(smallTableValuesIndex[i]); + } + } else { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + final int index = smallTableValuesIndex[i]; + + if (index >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + row[outputColumnNum++] = keyObjList.get(index); + } else { + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + int smallTableValueIndex = -index - 1; + + row[outputColumnNum++] = valueObjList.get(smallTableValueIndex); + } + } + } + + // UNDONE: Do we need to copy the objects? + Object standardCopyRow = + ObjectInspectorUtils.copyToStandardObject( + row, outputObjInspector, ObjectInspectorCopyOption.WRITABLE); + + internalForward(standardCopyRow, outputObjInspector); + nonMatchedValueCount++; + } + + nonMatchedKeyCount++; + } + } + @Override public void closeOp(boolean abort) throws HiveException { + + // FUTURE: Currently, we only support FULL OUTER MapJoin for single condition MapJoins. + if (isFullOuterMapJoin) { + generateFullOuterSmallTableNoMatches(); + } + boolean spilled = false; for (MapJoinTableContainer container : mapJoinTables) { if (container != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index c28ef99..662f5c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -242,6 +242,7 @@ public RowSchema getSchema() { // for output rows of this operator protected transient ObjectInspector outputObjInspector; + protected transient int auxiliaryChildIndex = -1; public void setId(String id) { this.id = id; @@ -914,51 +915,21 @@ protected long getNextCntr(long cntr) { protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - forward(row, rowInspector, false); - } - - protected void forward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - forward(vrg, rowInspector, true); - } - - protected void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) - throws HiveException { - if (isVectorized) { - vectorForward((VectorizedRowBatch) row, rowInspector); - } else { - baseForward(row, rowInspector); - } - } - - private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - this.runTimeNumRows += vrg.count(); + runTimeNumRows++; if (getDone()) { return; } - // Data structures to store original values - final int size = vrg.size; - final boolean selectedInUse = vrg.selectedInUse; - final boolean saveState = (selectedInUse && multiChildren); - if (saveState) { - System.arraycopy(vrg.selected, 0, selected, 0, size); - } - int childrenDone = 0; for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } Operator o = childOperatorsArray[i]; if (o.getDone()) { childrenDone++; } else { - o.process(vrg, childOperatorsTag[i]); - // Restore original values - vrg.size = size; - vrg.selectedInUse = selectedInUse; - if (saveState) { - System.arraycopy(selected, 0, vrg.selected, 0, size); - } + o.process(row, childOperatorsTag[i]); } } @@ -968,27 +939,70 @@ private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) } } - private void baseForward(Object row, ObjectInspector rowInspector) + public void forwardAuxiliary(Object row, ObjectInspector rowInspector) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; + } + + auxiliaryChild.process(row, childOperatorsTag[auxiliaryChildIndex]); + } + + protected void vectorForward(VectorizedRowBatch batch) throws HiveException { - this.runTimeNumRows++; + + runTimeNumRows++; if (getDone()) { return; } - int childrenDone = 0; - for (int i = 0; i < childOperatorsArray.length; i++) { - Operator o = childOperatorsArray[i]; - if (o.getDone()) { - childrenDone++; - } else { - o.process(row, childOperatorsTag[i]); + // Data structures to store original values + final int size = batch.size; + final boolean selectedInUse = batch.selectedInUse; + final boolean saveState = (selectedInUse && multiChildren); + if (saveState) { + System.arraycopy(batch.selected, 0, selected, 0, size); + } + + final int childSize = childOperatorsArray.length; + if (childSize == 1) { + childOperatorsArray[0].process(batch, 0); + } else { + int childrenDone = 0; + for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } + Operator o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + o.process(batch, 0); + + // Restore original values + batch.size = size; + batch.selectedInUse = selectedInUse; + if (saveState) { + System.arraycopy(selected, 0, batch.selected, 0, size); + } + } + } + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); } } + } - // if all children are done, this operator is also done - if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { - setDone(true); + public void vectorForwardAuxiliary(VectorizedRowBatch batch) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; } + + auxiliaryChild.process(batch, 0); } public void reset(){ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 0799181..ca04467 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -122,7 +122,11 @@ public void process(Object row, int tag) throws HiveException { if (conf != null && conf.isGatherStats()) { gatherStats(row); } - forward(row, inputObjInspectors[tag], vectorized); + if (vectorized) { + vectorForward((VectorizedRowBatch) row); + } else { + forward(row, inputObjInspectors[tag]); + } } private boolean checkSetDone(Object row, int tag) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index add8bda..4670fb0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -466,16 +466,18 @@ public void put(KvSource kv, int keyHashCode) throws SerDeException { * @param key Key buffer. * @param offset the offset to the key in the buffer * @param hashMapResult The object to fill in that can read the values. + * @param matchTracker Opitional object for tracking key matches. * @return The state byte. */ - public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { + public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult, + MatchTracker matchTracker) { hashMapResult.forget(); WriteBuffers.Position readPos = hashMapResult.getReadPos(); // First, find first record for the key. - long ref = findKeyRefToRead(key, offset, length, readPos); + long ref = findKeyRefToRead(key, offset, length, readPos, matchTracker); if (ref == 0) { return 0; } @@ -490,6 +492,12 @@ public byte getValueResult(byte[] key, int offset, int length, Result hashMapRes return Ref.getStateByte(ref); } + public void lookupKeyNoResult(byte[] key, int offset, int length, WriteBuffers.Position readPos, + MatchTracker matchTracker) { + + findKeyRefToRead(key, offset, length, readPos, matchTracker); + } + /** * Take the segment reference from {@link #getValueRefs(byte[], int, List)} * result and makes it self-contained - adds byte array where the value is stored, and @@ -500,6 +508,53 @@ public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { } /** + * Finds the next + * @param currentSlotNum Start by specifying -1; the return index from the previous call. + * @param keyRef If the return value is not -1, a reference to the key bytes. + * @param hashMapResult If the return value is not -1, the key's values. + * @param matchTracker The object that tracks matches (non-shared). + * @return The current index of the non-matched key; or -1 if no more. + */ + public int findNextNonMatched(int currentSlotNum, WriteBuffers.ByteSegmentRef keyRef, + Result hashMapResult, MatchTracker matchTracker) { + currentSlotNum++; + + hashMapResult.forget(); + + WriteBuffers.Position readPos = hashMapResult.getReadPos(); + + while (true) { + if (currentSlotNum >= refs.length) { + + // No more. + return -1; + } + long ref = refs[currentSlotNum]; + if (ref != 0 && !matchTracker.wasMatched(currentSlotNum)) { + + // An unmatched key. + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); + int valueLength = (int) writeBuffers.readVLong(readPos); + int keyLength = (int) writeBuffers.readVLong(readPos); + long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); + + keyRef.reset(keyOffset, keyLength); + if (keyLength > 0) { + writeBuffers.populateValue(keyRef); + } + + boolean hasList = Ref.hasList(ref); + long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; + + hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen); + + return currentSlotNum; + } + currentSlotNum++; + } + } + + /** * Number of keys in the hashmap * @return number of keys */ @@ -516,6 +571,10 @@ public int getNumValues() { return numValues; } + public int getNumHashBuckets() { + return refs.length; + } + /** * Number of bytes used by the hashmap * There are two main components that take most memory: writeBuffers and refs @@ -614,7 +673,7 @@ private int findKeySlotToWrite(long keyOffset, int keyLength, int hashCode) { * @return The ref to use for reading. */ private long findKeyRefToRead(byte[] key, int offset, int length, - WriteBuffers.Position readPos) { + WriteBuffers.Position readPos, MatchTracker matchTracker) { final int bucketMask = (refs.length - 1); int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; @@ -629,6 +688,13 @@ private long findKeyRefToRead(byte[] key, int offset, int length, return 0; } if (isSameKey(key, offset, length, ref, hashCode, readPos)) { + + if (matchTracker != null) { + + // It will only update memory when not set. + matchTracker.trackMatch(slot); + } + return ref; } ++metricGetConflict; @@ -897,7 +963,7 @@ public void debugDumpTable() { dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); - getValueResult(key, 0, key.length, hashMapResult); + getValueResult(key, 0, key.length, hashMapResult, null); List results = new ArrayList(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 9d35805..74ff48f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -113,6 +114,7 @@ public void put(MapJoinKey key, MapJoinRowContainer value) { public int size() { return mHash.size(); } + @Override public Set> entrySet() { return mHash.entrySet(); @@ -141,6 +143,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public long getEstimatedMemorySize() { // TODO: Key and Values are Object[] which can be eagerly deserialized or lazily deserialized. To accurately // estimate the entry size, every possible Objects in Key, Value should implement MemoryEstimate interface which @@ -188,6 +196,22 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (currentKey == null) { @@ -208,6 +232,18 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 027e39a..d9f75a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.Position; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; @@ -776,6 +778,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override public void seal() { for (HashPartition hp : hashPartitions) { // Only seal those partitions that haven't been spilled and cleared, @@ -835,6 +843,22 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (nulls == null) { @@ -851,6 +875,18 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) throws HiveException { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; @@ -884,7 +920,9 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + + // UNDONE: matchTracker return currentValue.setDirect(bytes, offset, length, hashMapResult); } @@ -892,6 +930,17 @@ public MapJoinRowContainer getCurrentRows() { public int directSpillPartitionId() { return currentValue.directSpillPartitionId(); } + + @Override + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public MatchTracker createMatchTracker() { + throw new RuntimeException("Not implemented"); + } } /** Row container that gets and deserializes the rows on demand from bytes provided. */ @@ -966,7 +1015,7 @@ public ReusableRowContainer() { } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, - output.getLength(), hashMapResult); + output.getLength(), hashMapResult, null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -977,6 +1026,10 @@ public ReusableRowContainer() { } } + public void reset() { + hashMapResult.forget(); + } + @Override public boolean hasRows() { return hashMapResult.hasRows() || (dummyRow != null); @@ -1116,7 +1169,7 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(bytes, offset, length, - hashMapResult); + hashMapResult, null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 033bbdb..6bacf51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -40,6 +41,8 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.WriteBuffers.Position; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; @@ -88,6 +91,7 @@ * compare the large table keys correctly when we do, we need to serialize them with correct * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe. */ + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -407,7 +411,8 @@ public long getEstimatedMemorySize() { @Override public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException { - AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); + keySerde = keyContext.getSerDe(); + AbstractSerDe valSerde = valueContext.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); @@ -456,6 +461,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + return new NonMatchedSmallTableIteratorImpl(matchTracker); + } + + @Override public void seal() { hashMap.seal(); } @@ -542,6 +553,64 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + boolean hasNulls = false; + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + if (currentKey[i] == null) { + nulls[i] = true; + hasNulls = true; + } else { + nulls[i] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + nulls[i] = currentKey[i] == null; + } + currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (nulls == null) { @@ -558,6 +627,48 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + boolean hasNulls = false; + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + if (currentKey[keyIndex] == null) { + nulls[keyIndex] = true; + hasNulls = true; + } else { + nulls[keyIndex] = true; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + nulls[keyIndex] = currentKey[keyIndex] == null; + } + currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; @@ -591,14 +702,26 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); } @Override public int directSpillPartitionId() { throw new UnsupportedOperationException("Getting the spill hash partition not supported"); } + + @Override + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + currentValue.setDirectNoResult(bytes, offset, length, readPos, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + return new MatchTracker(hashMap.getNumHashBuckets()); + } } /** Row container that gets and deserializes the rows on demand from bytes provided. */ @@ -619,6 +742,7 @@ public int directSpillPartitionId() { private final LazyBinaryStruct valueStruct; private final boolean needsComplexObjectFixup; private final ArrayList complexObjectArrayBuffer; + private final WriteBuffers.Position noResultReadPos; public ReusableRowContainer() { if (internalValueOi != null) { @@ -639,13 +763,31 @@ public ReusableRowContainer() { } uselessIndirection = new ByteArrayRef(); hashMapResult = new BytesBytesMultiHashMap.Result(); + noResultReadPos = new WriteBuffers.Position(); clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + public JoinUtil.JoinResult setFromOutput(Output output) { aliasFilter = hashMap.getValueResult( - output.getData(), 0, output.getLength(), hashMapResult); + output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { + + aliasFilter = hashMap.getValueResult( + output.getData(), 0, output.getLength(), hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -653,8 +795,16 @@ public ReusableRowContainer() { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } + } - } + public void setFromOutputNoResult(Output output, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult( + output.getData(), 0, output.getLength(), noResultReadPos, matchTracker); + } + + public void reset() { + hashMapResult.forget(); + } @Override public boolean hasRows() { @@ -773,8 +923,8 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -783,6 +933,70 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out return JoinUtil.JoinResult.NOMATCH; } } + + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult(bytes, offset, length, readPos, matchTracker); + } + } + + private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator { + + private final MatchTracker matchTracker; + + private int currentIndex; + + private final WriteBuffers.ByteSegmentRef keyRef; + private final BytesWritable bytesWritable; + private final ReusableRowContainer currentValue; + + public NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + + currentIndex = -1; + + keyRef = new WriteBuffers.ByteSegmentRef(); + bytesWritable = new BytesWritable(); + + currentValue = new ReusableRowContainer(); + } + + @Override + public boolean isNext() { + + // If another non-matched key is found, the key bytes will be referenced by keyRef, and + // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows. + currentIndex = + hashMap.findNextNonMatched( + currentIndex, keyRef, currentValue.getHashMapResult(), matchTracker); + return (currentIndex != -1); + } + + @Override + public List getCurrentKey() throws HiveException { + List deserializedList = + MapJoinKey.deserializeRow( + keyRef.getBytes(), + (int) keyRef.getOffset(), + keyRef.getLength(), + bytesWritable, keySerde); + return deserializedList; + } + + @Override + public ByteSegmentRef getCurrentKeyAsRef() { + return keyRef; + } + + @Override + public MapJoinRowContainer getCurrentRows() { + return currentValue; + } + + @Override + public BytesBytesMultiHashMap.Result getHashMapResult() { + return currentValue.getHashMapResult(); + } } public static boolean isSupportedKey(ObjectInspector keyOi) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 6504a5f..ac91187 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; /** @@ -171,4 +172,18 @@ public static Output serializeRow(Output byteStream, Object[] fieldData, } return byteStream; } + + /** + * Deserializes a key. + * @param BytesWritable to reuse. + */ + public static List deserializeRow(byte[] keyBytes, int keyOffset, int keyLength, + BytesWritable bytesWritable, AbstractSerDe serde) throws HiveException { + try { + bytesWritable.set(keyBytes, keyOffset, keyLength); + return (List) serde.deserialize(bytesWritable); + } catch (SerDeException e) { + throw new HiveException("Serialization error", e); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java index 345d1f4..a57c68b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java @@ -29,9 +29,14 @@ private final AbstractSerDe serde; private final boolean hasFilter; + static int fake; + public MapJoinObjectSerDeContext(AbstractSerDe serde, boolean hasFilter) throws SerDeException { this.serde = serde; + if (hasFilter) { + fake++; + } this.hasFilter = hasFilter; this.standardOI = ObjectInspectorUtils.getStandardObjectInspector(serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java index b0c7574..a618f5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -46,6 +47,12 @@ JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch) throws HiveException; + JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + + void setFromVectorNoResult(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a row object, and fields and ois used to interpret it. @@ -53,6 +60,15 @@ JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException; + // Version with MatchTracker object. + JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + + void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key that another adaptor has already deserialized via setFromVector/setFromRow. @@ -82,6 +98,41 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException; /** + * Iterates through the Small Table hash table and returns the key and value rows for any + * non-matched keys. + */ + public interface NonMatchedSmallTableIterator { + /** + * Return true if another non-matched key was found. + */ + boolean isNext(); + + /** + * @return The current key as a desearialized object array after a successful next() call + * that returns true. + * @throws HiveException + */ + List getCurrentKey() throws HiveException; + + /** + * @return The current key as a WriteBuffers.ByteSegmentRef after a successful next() call + * that returns true. + */ + ByteSegmentRef getCurrentKeyAsRef(); + + /** + * @return The container w/the values rows for the current key after a successful next() call + * that returns true. + */ + MapJoinRowContainer getCurrentRows(); + + /** + * @return The value rows has a BytesBytesMultiHashMap result. + */ + BytesBytesMultiHashMap.Result getHashMapResult(); + } + + /** * Indicates to the container that the puts have ended; table is now r/o. */ void seal(); @@ -94,6 +145,12 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) */ ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader); + /** + * Creates an iterator for going through the hash table and returns the key and value rows for any + * non-matched keys + */ + NonMatchedSmallTableIterator createNonMatchedSmallTableIterator(MatchTracker matchTracker); + /** Clears the contents of the table. */ void clear(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java new file mode 100644 index 0000000..a4b49fb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.persistence; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MatchTracker { + private static final Logger LOG = LoggerFactory.getLogger(MatchTracker.class); + + private final int logicalHashBucketCount; + private long[] longMatchFlags; + + public MatchTracker(int logicalHashBucketCount) { + this.logicalHashBucketCount = logicalHashBucketCount; + + final int longMatchFlagsSize = (logicalHashBucketCount + Long.SIZE - 1) / Long.SIZE; + longMatchFlags = new long[longMatchFlagsSize]; + } + + protected boolean isFirstMatch; + + public boolean getIsFirstMatch() { + return isFirstMatch; + } + + /* + * Track another match. + * @return Returns true if this is the first match. + */ + public void trackMatch(int logicalSlotNum) { + + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + if ((longMatchFlags[longWordIndex] & longBitMask) != 0) { + + // Flag is already on. + isFirstMatch = false; + } + longMatchFlags[longWordIndex] |= longBitMask; + isFirstMatch = true; + } + + /* + * @return Returns true if the slot key was matched. + */ + public boolean wasMatched(int logicalSlotNum) { + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + return (longMatchFlags[longWordIndex] & longBitMask) != 0; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java index 3303cc4..d22740d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java @@ -20,11 +20,16 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; public interface ReusableGetAdaptorDirectAccess { JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker); int directSpillPartitionId(); + void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker); + + MatchTracker createMatchTracker(); } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index c4503ad..f2400b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -136,7 +136,7 @@ public void process(Object data, int tag) throws HiveException { throw new HiveException(e); } - forward(data, rowInspector, true); + forward(data, rowInspector); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index e96619c..9615869 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -20,6 +20,7 @@ import java.sql.Date; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -985,6 +986,17 @@ public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects } } + public void assignRow(VectorizedRowBatch batch, int batchIndex, ArrayList objectList) { + final int count = isConvert.length; + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, objectList.get(i)); + } else { + assignRowColumn(batch, batchIndex, i, objectList.get(i)); + } + } + } + /* * Assign a row from a list of standard objects up to a count */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index bedc12a..0cf8491 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -259,14 +259,27 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa private CopyRow[] subRowToBatchCopiersByReference; public void init(VectorColumnMapping columnMapping) throws HiveException { - int count = columnMapping.getCount(); + init( + columnMapping.getInputColumns(), + columnMapping.getOutputColumns(), + columnMapping.getTypeInfos()); + } + + public void init(int[] columnMap, TypeInfo[] typeInfos) throws HiveException { + init(columnMap, columnMap, typeInfos); + } + + public void init(int[] inputColumnMap, int[] outputColumnMap, TypeInfo[] typeInfos) + throws HiveException { + + final int count = inputColumnMap.length; subRowToBatchCopiersByValue = new CopyRow[count]; subRowToBatchCopiersByReference = new CopyRow[count]; for (int i = 0; i < count; i++) { - int inputColumn = columnMapping.getInputColumns()[i]; - int outputColumn = columnMapping.getOutputColumns()[i]; - TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; + int inputColumn = inputColumnMap[i]; + int outputColumn = outputColumnMap[i]; + TypeInfo typeInfo = typeInfos[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 8ea625e..c9927d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -444,6 +444,38 @@ public void init(boolean[] columnsToIncludeTruncated) throws HiveException { } + public void init(int[] outputColumns, boolean[] columnsToInclude) throws HiveException { + + Preconditions.checkState( + outputColumns.length == columnsToInclude.length); + + final int columnCount = sourceTypeInfos.length; + allocateArrays(columnCount); + + int includedCount = 0; + final int[] includedIndices = new int[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (!columnsToInclude[i]) { + + // Field not included in query. + + } else { + + initTopLevelField(i, outputColumns[i], sourceTypeInfos[i], dataTypePhysicalVariations[i]); + includedIndices[includedCount++] = i; + } + } + + // Optimizing for readField? + if (includedCount < columnCount && deserializeRead.isReadFieldSupported()) { + useReadField = true; + readFieldLogicalIndices = Arrays.copyOf(includedIndices, includedCount); + } + + } + /** * Initialize for converting the source data type that are going to be read with the * DeserializedRead interface passed to the constructor to the target data types desired in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 14ac8ee..73965ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -133,7 +133,7 @@ public void process(Object row, int tag) throws HiveException { // All are selected, do nothing } if (vrg.size > 0) { - forward(vrg, null, true); + vectorForward(vrg); } // Restore the original selected vector diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 75efc29..20af960 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -1170,7 +1170,7 @@ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buff } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 051d338..7edb059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -88,7 +88,7 @@ public void process(Object row, int tag) throws HiveException { batch.selected[i] = batch.selected[skipSize + i]; } } - forward(row, inputObjInspectors[tag], true); + vectorForward(batch); currCount += batch.size; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 497b12d..e0f8636 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -25,6 +26,7 @@ import java.util.Map; import java.util.concurrent.Future; +import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -110,6 +113,197 @@ public VectorizationContext getInputVectorizationContext() { return vContext; } + /* + * RESTRICTION: + * No MapJoin key or value expressions other than ExprNodeColumnDesc. Big Table key and value + * columns can be easily determined. + * + * Big Table input maps: + * // Takes some row input and tells us which are key columns and which are value columns. + * Which input columns are the key columns. + * Which input columns are the value columns. + * // E.g. [0, 2, 10] is ctinyint (type: tinyint), cint (type: int), cboolean1 (type: boolean) + * // Input column names are _col0, _col1, _col2 by SELECT + * // so 0, 2 are the keys + * // where 1 is the value + * + * Big Table retain are input Big Table column numbers kept in the output (in output order). + * + * Big Table output mapping: + * + * // When Big Table output result starts at 0, then: + * // keys are [0, 2] + * // value is [1] + * // Needed to map Map Join output result keys and values to key and value expressions + * // that represent the Big Table input row so Auxiliary RS can be created. + * // If other order, then would start at smallTableResultSize offset. + * + * How to rename Auxiliary RS output (which is Big Table input) to _colN form? + * KEY.reducesinkkey 0 .. K - 1 are _outN where N is key map [keyNum] + * VALUE._outN are _outN where N is value map [valueNum] + + */ + public static class MapJoinBigTableInfo { + + private final int[] inputKeyColumnMap; + private final int[] inputValueColumnMap; + + private final int[] outputKeyColumnMap; + private final int[] outputValueColumnMap; + + private boolean isBigTableFirst; + private final int bigTableResultSize; + private final int smallTableResultSize; + + public MapJoinBigTableInfo( + int[] inputKeyColumnMap, + int[] inputValueColumnMap, + int[] outputKeyColumnMap, + int[] outputValueColumnMap, + boolean isBigTableFirst, + int bigTableResultSize, + int smallTableResultSize) { + this.inputKeyColumnMap = inputKeyColumnMap; + this.inputValueColumnMap = inputValueColumnMap; + + this.outputKeyColumnMap = outputKeyColumnMap; + this.outputValueColumnMap = outputValueColumnMap; + + this.isBigTableFirst = isBigTableFirst; + this.bigTableResultSize = bigTableResultSize; + this.smallTableResultSize = smallTableResultSize; + } + + public int[] getInputKeyColumnMap() { + return inputKeyColumnMap; + } + public int[] getInputValueColumnMap() { + return inputValueColumnMap; + } + + public int[] getOutputKeyColumnNums () { + return outputKeyColumnMap; + } + public int[] getOutputValueColumnNums () { + return outputValueColumnMap; + } + + public boolean getIsBigTableFirst() { + return isBigTableFirst; + } + public int getBigTableResultSize() { + return bigTableResultSize; + } + public int getSmallTableResultSize() { + return smallTableResultSize; + } + } + + public static MapJoinBigTableInfo getBigTableInfo(MapJoinDesc desc) { + + final byte posBigTable = (byte) desc.getPosBigTable(); + + List keyExprs = desc.getKeys().get(posBigTable); + final int keySize = keyExprs.size(); + List bigTableExprs = desc.getExprs().get(posBigTable); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + + final int outputColumnCount = desc.getOutputColumnNames().size(); + TypeInfo[] outputTypeInfos = new TypeInfo[outputColumnCount]; + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + final int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + List inputKeyColumnNumList = new ArrayList(); + List inputValueColumnNumList = new ArrayList(); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + Map columnMap = new HashMap(); + for (int i = 0; i < bigTableRetainSize; i++) { + + ExprNodeColumnDesc bigTableExpr = (ExprNodeColumnDesc) bigTableExprs.get(i); + TypeInfo typeInfo = bigTableExpr.getTypeInfo(); + + outputTypeInfos[nextOutputColumn] = typeInfo; + + columnMap.put(bigTableExpr.getColumn(), i); + nextOutputColumn++; + } + + for (int i = 0; i < keySize; i++) { + ExprNodeColumnDesc keyEpxr = (ExprNodeColumnDesc) keyExprs.get(i); + inputKeyColumnNumList.add(columnMap.get(keyEpxr.getColumn())); + } + + for (int i = 0; i < bigTableRetainSize; i++) { + if (inputKeyColumnNumList.contains(i)) { + continue; + } + inputValueColumnNumList.add(i); + } + + // UNDONE: + List outputKeyColumnNumList = new ArrayList(); + List outputValueColumnNumList = new ArrayList(); + + // UNDONE + outputKeyColumnNumList.addAll(inputKeyColumnNumList); + outputValueColumnNumList.addAll(inputValueColumnNumList); + + int[] inputKeyColumnNums = + ArrayUtils.toPrimitive(inputKeyColumnNumList.toArray(new Integer[0])); + int[] inputValueColumnNums = + ArrayUtils.toPrimitive(inputValueColumnNumList.toArray(new Integer[0])); + int[] outputKeyColumnNums = + ArrayUtils.toPrimitive(outputKeyColumnNumList.toArray(new Integer[0])); + int[] outputValueColumnNums = + ArrayUtils.toPrimitive(outputValueColumnNumList.toArray(new Integer[0])); + + boolean isBigTableFirst = (order[0] == posBigTable); + + return new MapJoinBigTableInfo( + inputKeyColumnNums, + inputValueColumnNums, + outputKeyColumnNums, + outputValueColumnNums, + isBigTableFirst, + bigTableRetainSize, + smallTableResultSize); + } + public static TypeInfo[] getOutputTypeInfos(MapJoinDesc desc) { final byte posBigTable = (byte) desc.getPosBigTable(); @@ -132,7 +326,8 @@ public VectorizationContext getInputVectorizationContext() { int[] smallTableIndices; int smallTableIndicesSize; List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { @@ -141,7 +336,8 @@ public VectorizationContext getInputVectorizationContext() { } List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); - final int smallTableRetainSize = smallTableRetainList.size(); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); int smallTableResultSize = 0; if (smallTableIndicesSize > 0) { @@ -216,6 +412,7 @@ public VectorizationContext getInputVectorizationContext() { return outputTypeInfos; } + @Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); @@ -234,7 +431,6 @@ public void initializeOp(Configuration hconf) throws HiveException { */ @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { - Object[] values = (Object[]) row; VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); @@ -242,7 +438,11 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch, outputBatch.size, values); + if (row instanceof ArrayList) { + va.assignRow(outputBatch, outputBatch.size, (ArrayList) row); + } else { + va.assignRow(outputBatch, outputBatch.size, (Object[]) row); + } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { @@ -251,7 +451,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } @@ -263,8 +463,10 @@ public void closeOp(boolean aborted) throws HiveException { tableContainer.dumpMetrics(); } } - if (!aborted && 0 < outputBatch.size) { - flushOutput(); + if (!aborted) { + if (outputBatch.size > 0) { + flushOutput(); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index a84bd72..28eb48d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -19,8 +19,13 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -28,6 +33,8 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -76,6 +83,12 @@ private VectorExpressionWriter[] rowWriters; // Writer for producing row from input batch protected transient Object[] singleRow; + private transient VectorCopyRow auxiliaryVectorCopy; + + private transient VectorizedRowBatch auxiliaryOutputBatch; + + private transient int[] auxiliaryNullColumnNums; + /** Kryo ctor. */ @VisibleForTesting public VectorMapJoinOperator() { @@ -195,6 +208,81 @@ protected Object _evaluate(Object row, int version) throws HiveException { } @Override + protected JoinUtil.JoinResult setMapJoinKeyNoNulls(ReusableGetAdaptor dest, Object row, byte alias, + MatchTracker matchTracker) + throws HiveException { + return dest.setFromVectorNoNulls(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + + @Override + protected void setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + dest.setFromVectorNoResult(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + + private int[] getNonBigKeyNullColumnNums(MapJoinBigTableInfo mapJoinBigTableInfo) { + Set auxiliaryNullColumnSet = new TreeSet(); + for (Integer i : mapJoinBigTableInfo.getInputValueColumnMap()) { + auxiliaryNullColumnSet.add(i); + } + int smallTableColumnNum = + (mapJoinBigTableInfo.getIsBigTableFirst() ? + mapJoinBigTableInfo.getBigTableResultSize() : 0); + for (int i = 0; i < mapJoinBigTableInfo.getSmallTableResultSize(); i++) { + auxiliaryNullColumnSet.add(i); + } + ArrayList auxiliaryNullColumnList = new ArrayList(); + auxiliaryNullColumnList.addAll(auxiliaryNullColumnSet); + return ArrayUtils.toPrimitive(auxiliaryNullColumnList.toArray(new Integer[0])); + } + + @Override + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + + if (auxiliaryVectorCopy == null) { + + MapJoinBigTableInfo mapJoinBigTableInfo = getBigTableInfo(conf); + int[] inputKeyColumnMap = mapJoinBigTableInfo.getInputKeyColumnMap(); + + auxiliaryVectorCopy = new VectorCopyRow(); + auxiliaryVectorCopy.init( + inputKeyColumnMap, + mapJoinBigTableInfo.getOutputKeyColumnNums(), + Arrays.copyOf(vOutContext.getInitialTypeInfos(), inputKeyColumnMap.length)); + + auxiliaryOutputBatch = VectorizedBatchUtil.makeLike(outputBatch); + + auxiliaryNullColumnNums = + getNonBigKeyNullColumnNums(mapJoinBigTableInfo); + } + + VectorizedRowBatch inBatch = (VectorizedRowBatch) firstTimeMatchRow; + auxiliaryVectorCopy.copyByValue( + inBatch, batchIndex, + auxiliaryOutputBatch, auxiliaryOutputBatch.size); + for (int columnNum : auxiliaryNullColumnNums) { + ColumnVector colVector = auxiliaryOutputBatch.cols[columnNum]; + colVector.isNull[auxiliaryOutputBatch.size] = true; + colVector.noNulls = true; + } + + ++auxiliaryOutputBatch.size; + if (auxiliaryOutputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + flushAuxiliaryOutput(); + } + } + + private void flushAuxiliaryOutput() throws HiveException { + vectorForwardAuxiliary(auxiliaryOutputBatch); + auxiliaryOutputBatch.reset(); + } + + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch inBatch = (VectorizedRowBatch) row; @@ -240,6 +328,16 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + if (auxiliaryOutputBatch != null && auxiliaryOutputBatch.size > 0) { + flushAuxiliaryOutput(); + } + } + } + + @Override protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { // Extract the actual row from row batch diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 35f810f..a88d2c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -324,7 +324,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 22d2f34..2f296c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -136,7 +136,7 @@ public void process(Object row, int tag) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { - forward(row, inputObjInspectors[tag], true); + vectorForward((VectorizedRowBatch) row); return; } @@ -155,7 +155,7 @@ public void process(Object row, int tag) throws HiveException { int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector, true); + vectorForward((VectorizedRowBatch) row); // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..eb9b09a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -51,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -61,11 +63,20 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -124,6 +135,10 @@ protected void initLoggingPrefix(String className) { // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +164,70 @@ protected void initLoggingPrefix(String className) { protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + * The Big Table key columns are from the key expressions. + * The Big Table value columns are from the getExpr(posBigTable) expressions. + * Any calculations needed for those will be scratch columns. + * + * The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + * Any Big Table Batch columns that will be in the output result. + * 0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + * For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + * result but are needed for the Small Table output result, they are put in this mapping + * as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + * For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + * output result from the Big Table key columns. The Big Table keys cannot be projected since + * on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + * For FULL OUTER MapJoin, the mapping from any needed Small Table key columns to their area + * in the output result. + * + * For deserializing a FULL OUTER non-match Small Table key into the output result. + * Can be partial or empty if some or all Small Table key columns are not retained. + * + * Small Table Value Mapping + * The mapping from Small Table value columns to their area in the output result. + * + * For deserializing Small Table value into the output result. + * + * It is the Small Table value index to output column numbers and TypeInfos. + * That is, a mapping of the LazyBinary field order to output batch scratch columns for the + * small table portion. + * Or, to use the output column nums for OUTER Small Table value NULLs. + * + */ + protected int[] bigTableRetainColumnMap; + protected TypeInfo[] bigTableRetainTypeInfos; + + protected int[] nonOuterSmallTableKeyColumnMap; + protected TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + protected VectorColumnOutputMapping outerSmallTableKeyMapping; - // This is a mapping of which keys will be copied from the big table (input and key expressions) - // to the small table result portion of the output for outer join. - protected VectorColumnOutputMapping bigTableOuterKeyMapping; + protected VectorColumnSourceMapping fullOuterSmallTableKeyMapping; - // This is a mapping of the values in the small table hash table that will be copied to the - // small table result portion of the output. That is, a mapping of the LazyBinary field order - // to output batch scratch columns for the small table portion. - protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping smallTableValueMapping; + // The MapJoin output result projection for both the Big Table input batch and the overflow batch. protected VectorColumnSourceMapping projectionMapping; // These are the output columns for the small table and the outer small table keys. - protected int[] smallTableOutputVectorColumns; - protected int[] bigTableOuterKeyOutputVectorColumns; + protected int[] outerSmallTableKeyColumnMap; + protected int[] smallTableValueColumnMap; // These are the columns in the big and small table that are ByteColumnVector columns. // We create data buffers for these columns so we can copy strings into those columns by value. protected int[] bigTableByteColumnVectorColumns; + protected int[] nonOuterSmallTableKeyByteColumnVectorColumns; + protected int[] outerSmallTableKeyByteColumnVectorColumns; protected int[] smallTableByteColumnVectorColumns; // The above members are initialized by the constructor and must not be @@ -186,13 +243,22 @@ protected void initLoggingPrefix(String className) { // portion of the join output. protected transient VectorCopyRow bigTableRetainedVectorCopy; + // This helper object deserializes BinarySortable format small table keys into columns of a row + // in a vectorized row batch. + protected int[] allSmallTableKeyColumnNums; + protected boolean[] allSmallTableKeyColumnIncluded; + protected transient VectorDeserializeRow smallTableKeyOuterVectorDeserializeRow; + + protected transient VectorCopyRow nonOuterSmallTableKeyVectorCopy; + + // UNDONE // A helper object that efficiently copies the big table key columns (input or key expressions) - // that appear in the small table portion of the join output for outer joins. - protected transient VectorCopyRow bigTableVectorCopyOuterKeys; + // that appear in the small table portion of the join output. + protected transient VectorCopyRow outerSmallTableKeyVectorCopy; // This helper object deserializes LazyBinary format small table values into columns of a row // in a vectorized row batch. - protected transient VectorDeserializeRow smallTableVectorDeserializeRow; + protected transient VectorDeserializeRow smallTableValueVectorDeserializeRow; // This a 2nd batch with the same "column schema" as the big table batch that can be used to // build join output results in. If we can create some join output results in the big table @@ -207,6 +273,9 @@ protected void initLoggingPrefix(String className) { // Whether the native vectorized map join operator has performed its common setup. protected transient boolean needCommonSetup; + // Whether the native vectorized map join operator has performed its first batch setup. + protected transient boolean needFirstBatchSetup; + // Whether the native vectorized map join operator has performed its // native vector map join hash table setup. protected transient boolean needHashTableSetup; @@ -214,6 +283,9 @@ protected void initLoggingPrefix(String className) { // The small table hash table for the native vectorized map join operator. protected transient VectorMapJoinHashTable vectorMapJoinHashTable; + protected transient long batchCounter; + protected transient long rowCounter; + /** Kryo ctor. */ protected VectorMapJoinCommonOperator() { super(); @@ -246,9 +318,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); isOuterJoin = !desc.getNoOuterJoin(); - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinVariation = this.vectorDesc.getVectorMapJoinVariation(); + hashTableKind = this.vectorDesc.getHashTableKind(); + hashTableKeyType = this.vectorDesc.getHashTableKeyType(); bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); @@ -260,11 +332,19 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); - bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); + bigTableFilterExpressions = vectorMapJoinInfo.getBigTableFilterExpressions(); + + bigTableRetainColumnMap = vectorMapJoinInfo.getBigTableRetainColumnMap(); + bigTableRetainTypeInfos = vectorMapJoinInfo.getBigTableRetainTypeInfos(); + + nonOuterSmallTableKeyColumnMap = vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap(); + nonOuterSmallTableKeyTypeInfos = vectorMapJoinInfo.getNonOuterSmallTableKeyTypeInfos(); - bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); + outerSmallTableKeyMapping = vectorMapJoinInfo.getOuterSmallTableKeyMapping(); - smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); + fullOuterSmallTableKeyMapping = vectorMapJoinInfo.getFullOuterSmallTableKeyMapping(); + + smallTableValueMapping = vectorMapJoinInfo.getSmallTableValueMapping(); projectionMapping = vectorMapJoinInfo.getProjectionMapping(); @@ -273,47 +353,73 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, protected void determineCommonInfo(boolean isOuter) throws HiveException { - bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); - smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + + smallTableValueColumnMap = smallTableValueMapping.getOutputColumns(); // Which big table and small table columns are ByteColumnVector and need have their data buffer // to be manually reset for some join result processing? - bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); + bigTableByteColumnVectorColumns = + getByteColumnVectorColumns(bigTableRetainColumnMap, bigTableRetainTypeInfos); + + nonOuterSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); - smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); + outerSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(outerSmallTableKeyMapping); + + smallTableByteColumnVectorColumns = + getByteColumnVectorColumns(smallTableValueMapping); outputProjection = projectionMapping.getOutputColumns(); outputTypeInfos = projectionMapping.getTypeInfos(); - if (LOG.isDebugEnabled()) { + if (LOG.isInfoEnabled()) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor getBigTableRetainColumnMap " + Arrays.toString(bigTableRetainColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainTypeInfos " + Arrays.toString(bigTableRetainTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor nonOuterSmallTableKeyColumnMap " + Arrays.toString(nonOuterSmallTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor nonOuterSmallTableKeyTypeInfos " + Arrays.toString(nonOuterSmallTableKeyTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outerSmallTableKeyMapping " + outerSmallTableKeyMapping.toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor fullOuterSmallTableKeyMapping " + fullOuterSmallTableKeyMapping.toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableValueMapping " + smallTableValueMapping.toString()); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getKeysString " + conf.getKeysString()); + if (conf.getValueIndices() != null) { + for (Entry entry : conf.getValueIndices().entrySet()) { + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getValueIndices +" + + (int) entry.getKey() + " " + Arrays.toString(entry.getValue())); + } + } + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getExprs " + conf.getExprs().toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getRetainList " + conf.getRetainList().toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -323,11 +429,14 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { * Determine from a mapping which columns are BytesColumnVector columns. */ private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) { + return getByteColumnVectorColumns(mapping.getOutputColumns(), mapping.getTypeInfos()); + } + + private int[] getByteColumnVectorColumns(int[] outputColumns, TypeInfo[] typeInfos) { + // Search mapping for any strings and return their output columns. ArrayList list = new ArrayList(); - int count = mapping.getCount(); - int[] outputColumns = mapping.getOutputColumns(); - TypeInfo[] typeInfos = mapping.getTypeInfos(); + final int count = outputColumns.length; for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; String typeName = typeInfos[i].getTypeName(); @@ -386,9 +495,57 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { return hashTableLoader; } + private void initializeFullOuterObjects() throws HiveException { + + // The Small Table key type jnfo is the same as Big Table's. + TypeInfo[] smallTableKeyTypeInfos = bigTableKeyTypeInfos; + final int allKeysSize = smallTableKeyTypeInfos.length; + + /* + * The VectorMapJoinFullOuter[Intersect]{Long|MultiKey|String}Operator outputs 0, 1, or more + * Small Key columns in the join result. + */ + allSmallTableKeyColumnNums = new int[allKeysSize]; + Arrays.fill(allSmallTableKeyColumnNums, -1); + allSmallTableKeyColumnIncluded = new boolean[allKeysSize]; + + final int outputKeysSize = fullOuterSmallTableKeyMapping.getCount(); + int[] outputKeyNums = fullOuterSmallTableKeyMapping.getInputColumns(); + int[] outputKeyOutputColumns = fullOuterSmallTableKeyMapping.getOutputColumns(); + for (int i = 0; i < outputKeysSize; i++) { + final int outputKeyNum = outputKeyNums[i]; + allSmallTableKeyColumnNums[outputKeyNum] = outputKeyOutputColumns[i]; + allSmallTableKeyColumnIncluded[outputKeyNum] = true; + } + + if (hashTableKeyType == HashTableKeyType.MULTI_KEY && + outputKeysSize > 0) { + + // UNDONE: boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker + smallTableKeyOuterVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + smallTableKeyTypeInfos, + /* useExternalBuffer */ true)); + smallTableKeyOuterVectorDeserializeRow.init( + allSmallTableKeyColumnNums, allSmallTableKeyColumnIncluded); + } + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !conf.isDynamicPartitionHashJoin() && + !conf.isFullOuterIntersect()) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator.. + auxiliaryChildIndex = 1; + } + VectorExpression.doTransientInit(bigTableFilterExpressions); VectorExpression.doTransientInit(bigTableKeyExpressions); VectorExpression.doTransientInit(bigTableValueExpressions); @@ -405,23 +562,34 @@ protected void initializeOp(Configuration hconf) throws HiveException { /* * Create our vectorized copy row and deserialize row helper objects. */ - if (smallTableMapping.getCount() > 0) { - smallTableVectorDeserializeRow = + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + initializeFullOuterObjects(); + } + + if (smallTableValueMapping.getCount() > 0) { + smallTableValueVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - smallTableMapping.getTypeInfos(), + smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */ true)); - smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); + smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns()); } - if (bigTableRetainedMapping.getCount() > 0) { + if (bigTableRetainColumnMap.length > 0) { bigTableRetainedVectorCopy = new VectorCopyRow(); - bigTableRetainedVectorCopy.init(bigTableRetainedMapping); + bigTableRetainedVectorCopy.init( + bigTableRetainColumnMap, bigTableRetainTypeInfos); + } + + if (nonOuterSmallTableKeyColumnMap.length > 0) { + nonOuterSmallTableKeyVectorCopy = new VectorCopyRow(); + nonOuterSmallTableKeyVectorCopy.init( + nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); } - if (bigTableOuterKeyMapping.getCount() > 0) { - bigTableVectorCopyOuterKeys = new VectorCopyRow(); - bigTableVectorCopyOuterKeys.init(bigTableOuterKeyMapping); + if (outerSmallTableKeyMapping.getCount() > 0) { + outerSmallTableKeyVectorCopy = new VectorCopyRow(); + outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping); } /* @@ -430,6 +598,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { overflowBatch = setupOverflowBatch(); needCommonSetup = true; + needFirstBatchSetup = true; needHashTableSetup = true; if (LOG.isDebugEnabled()) { @@ -555,27 +724,49 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, /* * Common one time setup by native vectorized map join operator's processOp. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { + protected void commonSetup() throws HiveException { - if (LOG.isDebugEnabled()) { - LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); - displayBatchColumns(batch, "batch"); - displayBatchColumns(overflowBatch, "overflowBatch"); + /* + * Make sure big table BytesColumnVectors have room for string values in the overflow batch... + */ + for (int column: bigTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); } - // Make sure big table BytesColumnVectors have room for string values in the overflow batch... - for (int column: bigTableByteColumnVectorColumns) { + for (int column : nonOuterSmallTableKeyByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + for (int column : outerSmallTableKeyByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } + for (int column: smallTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + batchCounter = 0; + rowCounter = 0; + } + + static int fake; + + /* + * Common one time setup by native vectorized map join operator's first batch. + */ + public void firstBatchSetup(VectorizedRowBatch batch) throws HiveException { // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { + if (column >= batch.cols.length) { + fake++; + } BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); - bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; - bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled @@ -583,6 +774,54 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { spillReplayBatch = VectorizedBatchUtil.makeLike(batch); } + public void hashTableSetup() throws HiveException { + } + + public abstract void processBatch(VectorizedRowBatch batch) throws HiveException; + + @Override + public void process(Object row, int tag) throws HiveException { + + VectorizedRowBatch batch = (VectorizedRowBatch) row; + alias = (byte) tag; + + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needFirstBatchSetup) { + + // Our one time first-batch method initialization. + firstBatchSetup(batch); + + needFirstBatchSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + batchCounter++; + + if (batch.size == 0) { + return; + } + + rowCounter += batch.size; + + processBatch(batch); + } + protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java new file mode 100644 index 0000000..97d6d68 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import java.io.IOException; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +// Single-Column Long specific imports. +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/* + * Specialized class for doing a vectorized map join that is a full outer join on a Single-Column + * Long using a hash map. + */ +public class VectorMapJoinFullOuterIntersectLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectLongOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column Long specific declarations. + */ + + // The one join column for this specialized class. + LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn]; + long[] vector = joinColVector.vector; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + hashMap.lookupNoResult(vector[batchIndex], fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + hashMap.lookupNoResult(vector[batchIndex], fullOuterIntersectReadPos, matchTracker); + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java new file mode 100644 index 0000000..02a4c18 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is A FULL outer join on Multi-Key + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Multi-Key specific declarations. + */ + + // None. + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java new file mode 100644 index 0000000..4ec6e7c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is an outer join on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectStringOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column String specific declarations. + */ + + // The one join column for this specialized class. + BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn]; + byte[][] vector = joinColVector.vector; + int[] start = joinColVector.start; + int[] length = joinColVector.length; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java new file mode 100644 index 0000000..4267f8a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is a full outer join on a Single-Column + * Long using a hash map. + */ +public class VectorMapJoinFullOuterLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterLongOperator() { + super(); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java new file mode 100644 index 0000000..ff119e5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is A FULL outer join on Multi-Key + * using a hash map. + */ +public class VectorMapJoinFullOuterMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java new file mode 100644 index 0000000..9ab4bf2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is an outer join on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterStringOperator() { + super(); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 92ec1ee..32cb1cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -93,9 +93,6 @@ private transient Thread ownThread; private transient int interruptCheckCounter = CHECK_INTERRUPT_PER_OVERFLOW_BATCHES; - // Debug display. - protected transient long batchCounter; - /** Kryo ctor. */ protected VectorMapJoinGenerateResultOperator() { super(); @@ -124,13 +121,6 @@ private void setUpInterruptChecking() { ownThread = Thread.currentThread(); } - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); - - batchCounter = 0; - - } - //------------------------------------------------------------------------------------------------ protected void performValueExpressions(VectorizedRowBatch batch, @@ -157,24 +147,24 @@ protected void performValueExpressions(VectorizedRowBatch batch, batch.selectedInUse = saveSelectedInUse; } - protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, + protected void doSmallTableValueDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException { byte[] bytes = byteSegmentRef.getBytes(); int offset = (int) byteSegmentRef.getOffset(); int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + smallTableValueVectorDeserializeRow.setBytes(bytes, offset, length); try { // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. - smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex); + smallTableValueVectorDeserializeRow.deserializeByRef(batch, batchIndex); } catch (Exception e) { throw new HiveException( "\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + - smallTableVectorDeserializeRow.getDetailedReadPositionString(), + smallTableValueVectorDeserializeRow.getDetailedReadPositionString(), e); } } @@ -215,22 +205,23 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; - // Outer key copying is only used when we are using the input BigTable batch as the output. - // - if (bigTableVectorCopyOuterKeys != null) { - // Copy within row. - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // same batch by reference. + // + outerSmallTableKeyVectorCopy.copyByReference( + batch, batchIndex, + batch, batchIndex); } - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(batch, batchIndex, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult); } - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table"); - // Use the big table row as output. batch.selected[numSel++] = batchIndex; } @@ -273,26 +264,45 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. - // Note this includes any outer join keys that need to go into the small table "area". + // if (bigTableRetainedVectorCopy != null) { - bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, - overflowBatch, overflowBatch.size); + bigTableRetainedVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - if (smallTableVectorDeserializeRow != null) { + if (nonOuterSmallTableKeyVectorCopy != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, - byteSegmentRef, hashMapResult); + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); + } + + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area across + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow"); + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } overflowBatch.size++; if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { @@ -333,8 +343,8 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, // Fill up as much of the overflow batch as possible with small table values. while (byteSegmentRef != null) { - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); } @@ -361,9 +371,40 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; if (bigTableRetainedVectorCopy != null) { + // The one big table row's values repeat. - bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableRetainedMapping.getOutputColumns()) { + bigTableRetainedVectorCopy.copyByReference( + batch, batchIndex, + overflowBatch, 0); + for (int column : bigTableRetainColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + if (nonOuterSmallTableKeyVectorCopy != null) { + + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + for (int column : nonOuterSmallTableKeyColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + int[] outerSmallTableKeyColumnMap = null; + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + for (int column : outerSmallTableKeyColumnMap) { overflowBatch.cols[column].isRepeating = true; } } @@ -373,10 +414,20 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, forwardOverflowNoReset(); // Hand reset the big table columns. - for (int column : bigTableRetainedMapping.getOutputColumns()) { + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + for (int column : nonOuterSmallTableKeyColumnMap) { ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } + if (outerSmallTableKeyColumnMap != null) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + } } byteSegmentRef = hashMapResult.next(); @@ -485,13 +536,9 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); -// int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); -// int length = output.getLength() - offset; rowBytesContainer.finishRow(); - -// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); } protected void spillHashMapBatch(VectorizedRowBatch batch, @@ -637,7 +684,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; - forward(batch, null, true); + vectorForward(batch); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; @@ -649,7 +696,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); overflowBatch.reset(); maybeCheckInterrupt(); } @@ -666,7 +713,7 @@ private void maybeCheckInterrupt() throws HiveException { * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); } /* @@ -679,6 +726,11 @@ private void forwardOverflowNoReset() throws HiveException { @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); + + // NOTE: The closeOp call on super MapJoinOperator can trigger Hybrid Grace additional + // NOTE: processing and also FULL OUTER MapJoin non-match Small Table result generation. So, + // NOTE: we flush the overflowBatch after the call. + // if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index f791d95..35ddddd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -103,25 +103,25 @@ public VectorMapJoinInnerBigOnlyGenerateResultOperator(CompilationOpContext ctx, /* * Setup our inner big table only join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner big-table only join specific. VectorMapJoinHashMultiSet baseHashMultiSet = (VectorMapJoinHashMultiSet) vectorMapJoinHashTable; - hashMultiSetResults = new VectorMapJoinHashMultiSetResult[batch.DEFAULT_SIZE]; + hashMultiSetResults = new VectorMapJoinHashMultiSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMultiSetResults.length; i++) { hashMultiSetResults[i] = baseHashMultiSet.createHashMultiSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesValueCounts = new long[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesValueCounts = new long[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 678fa42..30a19b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -102,45 +102,36 @@ public VectorMapJoinInnerBigOnlyLongOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { + protected void commonSetup() throws HiveException { + super.commonSetup(); - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ + /* + * Initialize Single-Column Long members for this specialized class. + */ - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. - - /* - * Get our Single-Column Long hash multi-set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; - useMinMax = hashMultiSet.useMinMax(); - if (useMinMax) { - min = hashMultiSet.min(); - max = hashMultiSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ + + hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; + useMinMax = hashMultiSet.useMinMax(); + if (useMinMax) { + min = hashMultiSet.min(); + max = hashMultiSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -153,11 +144,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 866aa60..339c343 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -31,11 +31,11 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMultiSet; // Multi-Key specific imports. import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; import org.apache.hadoop.hive.serde2.ByteStream.Output; @@ -109,45 +109,40 @@ public VectorMapJoinInnerBigOnlyMultiKeyOperator(CompilationOpContext ctx, Opera // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Multi-Key members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Multi-Key members for this specialized class. - */ + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash multi-set information for this specialized class. + */ - /* - * Get our Multi-Key hash multi-set information for this specialized class. - */ - - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -160,11 +155,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index a0c3b9c..e373db1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -98,40 +97,31 @@ public VectorMapJoinInnerBigOnlyStringOperator(CompilationOpContext ctx, Operato // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash multi-set information for this specialized class. - */ + /* + * Get our Single-Column String hash multi-set information for this specialized class. + */ - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index ea2c04d..dc5d046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -108,26 +108,26 @@ public VectorMapJoinInnerGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our inner join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } /* @@ -142,7 +142,7 @@ protected void innerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 36404bc..5ac606a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -101,45 +100,36 @@ public VectorMapJoinInnerLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -151,11 +141,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index 620101f..cdee3fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -107,45 +106,36 @@ public VectorMapJoinInnerMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash map information for this specialized class. - */ + /* + * Get our Multi-Key hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -157,11 +147,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index d99d514..8e6697e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -97,40 +96,31 @@ public VectorMapJoinInnerStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash map information for this specialized class. - */ + /* + * Get our Single-Column String hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -142,11 +132,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index f68d4c4..71ec56b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -89,21 +89,21 @@ public VectorMapJoinLeftSemiGenerateResultOperator(CompilationOpContext ctx, Ope /* * Setup our left semi join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Semi join specific. VectorMapJoinHashSet baseHashSet = (VectorMapJoinHashSet) vectorMapJoinHashTable; - hashSetResults = new VectorMapJoinHashSetResult[batch.DEFAULT_SIZE]; + hashSetResults = new VectorMapJoinHashSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashSetResults.length; i++) { hashSetResults[i] = baseHashSet.createHashSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 4185c5b..40e7cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; @@ -102,45 +101,36 @@ public VectorMapJoinLeftSemiLongOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; - useMinMax = hashSet.useMinMax(); - if (useMinMax) { - min = hashSet.min(); - max = hashSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash set information for this specialized class. + */ + + hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; + useMinMax = hashSet.useMinMax(); + if (useMinMax) { + min = hashSet.min(); + max = hashSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -153,11 +143,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index 541e7fa..e5d9fda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -108,45 +107,36 @@ public VectorMapJoinLeftSemiMultiKeyOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash set information for this specialized class. - */ + /* + * Get our Multi-Key hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -159,11 +149,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public VectorMapJoinLeftSemiStringOperator(CompilationOpContext ctx, OperatorDes // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash set information for this specialized class. - */ + /* + * Get our Single-Column String hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 8a6c817..05a5aff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,16 +24,26 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; /** @@ -114,6 +124,18 @@ protected transient int[] noMatchs; protected transient int[] merged; + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared, we need this non-shared private object for + * our key match tracking. + */ + protected MatchTracker matchTracker; + + protected transient boolean isFullOuterForwardKeysToIntersect; + protected transient WriteBuffers.Position fullOuterIntersectReadPos; + + protected transient int[] fullOuterForwardKeys; + /** Kryo ctor. */ protected VectorMapJoinOuterGenerateResultOperator() { super(); @@ -131,32 +153,37 @@ public VectorMapJoinOuterGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our outer join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Outer join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - inputSelected = new int[batch.DEFAULT_SIZE]; + inputSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - allMatchs = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + nonSpills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + noMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; + merged = new int[VectorizedRowBatch.DEFAULT_SIZE]; - nonSpills = new int[batch.DEFAULT_SIZE]; - noMatchs = new int[batch.DEFAULT_SIZE]; - merged = new int[batch.DEFAULT_SIZE]; + matchTracker = null; + isFullOuterForwardKeysToIntersect = false; + fullOuterIntersectReadPos = null; + fullOuterForwardKeys = null; } @@ -174,15 +201,16 @@ protected void outerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector bigTableOuterKeyColumn = batch.cols[column]; - bigTableOuterKeyColumn.reset(); - } } /** @@ -576,14 +604,15 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, // Mark any scratch small table scratch columns that would normally receive a copy of the // key as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { + // + for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; } // Small table values are set to null. - for (int column : smallTableOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; @@ -734,20 +763,345 @@ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult jo */ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { - for (int column : smallTableOutputVectorColumns) { + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } - // Mark any scratch small table scratch columns that would normally receive a copy of the key - // as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } } + + private void markBigTableColumnsAsNullRepeating() { + + /* + * For non-match FULL OUTER Small Table results, the Big Table columns are all NULL. + */ + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.isRepeating = true; + colVector.noNulls = false; + colVector.isNull[0] = true; + } + } + + @Override + protected void generateFullOuterSmallTableNoMatches() throws HiveException { + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + /* + * For dynamic partition hash join, both the Big Table and Small Table are partitioned (sent) + * to the Reducer using the key hash code. So, we can generate the non-match Small Table + * results locally. + * + * Or, for Intersect, we have been tracking the matched keys received from all the FULL OUTER + * MapJoin operators. So, we can generate the non-match Small Table results in this + * centralized operator. + * + * Scan the Small Table for keys that didn't match and generate the non-matchs into the + * overflowBatch. + */ + + /* + * If there were no matched keys sent, we need to do our common initialization. + */ + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + /* + * To support fancy NULL repeating columns, let's flush the overflowBatch if it has anything. + */ + if (overflowBatch.size > 0) { + forwardOverflow(); + } + markBigTableColumnsAsNullRepeating(); + + switch (hashTableKeyType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + generateFullOuterLongKeySmallTableNoMatches(); + break; + case STRING: + generateFullOuterStringKeySmallTableNoMatches(); + break; + case MULTI_KEY: + generateFullOuterMultiKeySmallTableNoMatches(); + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType); + } + } + + protected void generateFullOuterLongKeySmallTableNoMatches() + throws HiveException { + + final LongColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (LongColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinLongHashMap hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final long longKey; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (!isKeyNull) { + longKey = nonMatchedIterator.getNonMatchedLongKey(); + } else { + longKey = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.vector[overflowBatch.size] = longKey; + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + private void doSmallTableKeyDeserializeRow(VectorizedRowBatch batch, int batchIndex, + byte[] keyBytes, int keyOffset, int keyLength) + throws HiveException { + + smallTableKeyOuterVectorDeserializeRow.setBytes(keyBytes, keyOffset, keyLength); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + smallTableKeyOuterVectorDeserializeRow.deserializeByRef(batch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + smallTableKeyOuterVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + + protected void generateFullOuterMultiKeySmallTableNoMatches() throws HiveException { + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + nonMatchedIterator.readNonMatchedBytesKey(); + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + final int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + final int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (smallTableKeyOuterVectorDeserializeRow != null) { + doSmallTableKeyDeserializeRow(overflowBatch, overflowBatch.size, + keyBytes, keyOffset, keyLength); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + + // NOTE: We don't have to deal with FULL OUTER All-NULL key values like we do for single-column + // LONG and STRING because we do store them in the hash map... + } + + protected void generateFullOuterStringKeySmallTableNoMatches() throws HiveException { + + final BytesColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (BytesColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final byte[] keyBytes; + final int keyOffset; + final int keyLength; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedBytesKey(); + if (!isKeyNull) { + keyBytes = nonMatchedIterator.getNonMatchedBytes(); + keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + } else { + keyBytes = null; + keyOffset = 0; + keyLength = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.setVal( + overflowBatch.size, + keyBytes, keyOffset, keyLength); + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + protected void fullOuterHashTableSetup() { + + // Always track key matches for FULL OUTER. + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + if (!conf.isDynamicPartitionHashJoin()) { + + // When the Small Table is shared among all Reducers, FULL OUTER MapJoin we must forward + // matched keys to Intersect. + isFullOuterForwardKeysToIntersect = true; + fullOuterForwardKeys = new int[VectorizedRowBatch.DEFAULT_SIZE]; + } + } + + protected void fullOuterIntersectHashTableSetup() { + + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + fullOuterIntersectReadPos = new WriteBuffers.Position(); + } + + protected void forwardFullOuterKeysToInterset(VectorizedRowBatch batch, + int fullOuterForwardKeyCount) throws HiveException { + + // Save original projection. + int[] originalProjections = batch.projectedColumns; + int originalProjectionSize = batch.projectionSize; + + // Save selected. + int[] originalSelected = batch.selected; + boolean originalSelectedInUse = batch.selectedInUse; + int originalSize = batch.size; + + // Project with the output of our operator. + batch.projectionSize = outputProjection.length; + batch.projectedColumns = outputProjection; + + // Forward just the rows whose key had a first-time match. + batch.selected = fullOuterForwardKeys; + batch.selectedInUse = true; + batch.size = fullOuterForwardKeyCount; + + vectorForwardAuxiliary(batch); + + // Revert the projected columns back, because batch can be re-used by our parent operators. + batch.projectionSize = originalProjectionSize; + batch.projectedColumns = originalProjections; + + batch.selected = originalSelected; + batch.selectedInUse = originalSelectedInUse; + batch.size = originalSize; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index be05cc2..cc47c1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -65,7 +64,7 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinLongHashMap hashMap; + protected transient VectorMapJoinLongHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column Long specific members. @@ -77,7 +76,7 @@ protected String getLoggingPrefix() { private transient long max; // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -102,55 +101,41 @@ public VectorMapJoinOuterLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } + /* + * Initialize Single-Column Long members for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + singleJoinColumn = bigTableKeyColumnMap[0]; + } - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -160,9 +145,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -174,19 +156,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -205,6 +174,11 @@ public void process(Object row, int tag) throws HiveException { long[] vector = joinColVector.vector; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column Long check for repeating. */ @@ -234,12 +208,16 @@ public void process(Object row, int tag) throws HiveException { } else { // Handle *repeated* join key, if found. long key = vector[0]; - // LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMap.lookup(key, hashMapResults[0]); + joinResult = hashMap.lookup(key, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } } @@ -247,9 +225,6 @@ public void process(Object row, int tag) throws HiveException { * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -258,10 +233,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -286,8 +257,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column Long outer null detection. */ @@ -305,7 +274,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -354,23 +322,28 @@ public void process(Object row, int tag) throws HiveException { // Key out of range for whole hash table. saveJoinResult = JoinUtil.JoinResult.NOMATCH; } else { - saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount], + matchTracker); } - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name()); - /* * Common outer join result processing. */ switch (saveJoinResult) { case MATCH: + if (currentKey == -2144795298) { + fake++; + } + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -381,11 +354,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -393,7 +364,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -403,13 +373,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -451,9 +417,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 70f88e3..3050333 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -69,17 +68,17 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Multi-Key specific members. // // Object that can take a set of columns in row in a vectorized row batch and serialized it. - private transient VectorSerializeRow keyVectorSerializeWrite; + protected transient VectorSerializeRow keyVectorSerializeWrite; // The BinarySortable serialization of the current key. - private transient Output currentKeyOutput; + protected transient Output currentKeyOutput; // The BinarySortable serialization of the saved key for a possible series of equal keys. private transient Output saveKeyOutput; @@ -107,55 +106,41 @@ public VectorMapJoinOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ + /* + * Initialize Multi-Key members for this specialized class. + */ - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - needCommonSetup = false; - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash map information for this specialized class. + */ - /* - * Get our Multi-Key hash map information for this specialized class. - */ + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -165,9 +150,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -179,19 +161,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -208,6 +177,11 @@ public void process(Object row, int tag) throws HiveException { // None. /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Multi-Key Long check for repeating. */ @@ -259,16 +233,18 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.serializeWrite(batch, 0); byte[] keyBytes = currentKeyOutput.getData(); int keyLength = currentKeyOutput.getLength(); - joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -277,10 +253,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -305,8 +277,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Multi-Key outer null detection. */ @@ -325,7 +295,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -375,7 +344,9 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, + hashMapResults[hashMapResultCount], matchTracker); + /* * Common outer join result processing. @@ -383,12 +354,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -399,11 +373,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -411,7 +383,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -421,13 +392,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -469,9 +436,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 714f5ec..6b41776 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -26,11 +26,11 @@ import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -65,14 +65,14 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column String specific members. // // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -97,50 +97,36 @@ public VectorMapJoinOuterStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Single-Column String members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column String members for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - singleJoinColumn = bigTableKeyColumnMap[0]; + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column String hash map information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - /* - * Get our Single-Column String hash map information for this specialized class. - */ - - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -150,33 +136,17 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } // Filtering for outer join just removes rows available for hash table matching. - boolean someRowsFilteredOut = false; + boolean someRowsFilteredOut = false; if (bigTableFilterExpressions.length > 0) { // Since the input for (VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -197,6 +167,11 @@ public void process(Object row, int tag) throws HiveException { int[] length = joinColVector.length; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column String check for repeating. */ @@ -228,7 +203,13 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[0]; int keyStart = start[0]; int keyLength = length[0]; - joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup( + keyBytes, keyStart, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* @@ -246,10 +227,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -274,8 +251,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column String outer null detection. */ @@ -293,7 +268,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -343,7 +317,8 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, + hashMapResults[hashMapResultCount], matchTracker); /* * Common outer join result processing. @@ -351,12 +326,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -367,11 +345,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -379,7 +355,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -389,13 +364,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -437,9 +408,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..2211acf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,9 +23,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -46,12 +50,115 @@ protected BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedBytesHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastBytesHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + public NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastBytesHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedReadPos = new WriteBuffers.Position(); + nonMatchedKeyByteSegmentRef = new ByteSegmentRef(); + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount) { + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedTripleIndex = nonMatchedLogicalSlotNum * 3; + if (hashMap.slotTriples[nonMatchedTripleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotTriples[nonMatchedTripleIndex + 2]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + if (keyIsNull) { + return false; + } + hashMap.keyStore.getKey( + hashMap.slotTriples[nonMatchedLogicalSlotNum * 3], + nonMatchedKeyByteSegmentRef, + nonMatchedReadPos); + return true; + } + + @Override + public byte[] getNonMatchedBytes() { + return nonMatchedKeyByteSegmentRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) nonMatchedKeyByteSegmentRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return nonMatchedKeyByteSegmentRef.getLength(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } + + @Override public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue) { @@ -64,31 +171,56 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } } @Override - public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long valueRefWord = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (valueRefWord == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); - optimizedHashMapResult.set(valueStore, valueRefWord); + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) { + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + JoinUtil.JoinResult joinResult; + if (tripleIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -98,10 +230,40 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, return joinResult; } + @Override + public void lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, readPos); + JoinUtil.JoinResult joinResult; + if (tripleIndex != -1) { + matchTracker.trackMatch(tripleIndex / 3); + } + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastBytesHashMap( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + fullOuterNullKeyValueRef = 0; + valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..c0295dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -57,10 +57,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -75,13 +73,20 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long count = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ + optimizedHashMultiSetResult.set(slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..e99a029 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -65,11 +65,19 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long existance = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..dcb89b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -70,13 +70,11 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr while (true) { int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } @@ -150,7 +148,6 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotTriples[newTripleIndex] = keyRef; newSlotTriples[newTripleIndex + 1] = hashCode; @@ -165,10 +162,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected final long findReadSlot( + protected final int findReadSlot( byte[] keyBytes, int keyStart, int keyLength, long hashCode, WriteBuffers.Position readPos) { int intHashCode = (int) hashCode; @@ -177,7 +173,6 @@ protected final long findReadSlot( int i = 0; while (true) { int tripleIndex = slot * 3; - // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); if (slotTriples[tripleIndex] == 0) { // Given that we do not delete, an empty slot means no match. return -1; @@ -185,7 +180,7 @@ protected final long findReadSlot( // Finally, verify the key bytes match. if (keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength, readPos)) { - return slotTriples[tripleIndex + 2]; + return tripleIndex; } } // Some other key (collision) - keep probing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1..a868b5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -22,7 +22,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); @@ -96,4 +98,14 @@ public long getEstimatedMemorySize() { JavaDataModel jdm = JavaDataModel.get(); return JavaDataModel.alignUp(10L * jdm.primitive1() + jdm.primitive2(), jdm.memoryAlign()); } + + @Override + public MatchTracker createMatchTracker() { + return new MatchTracker(logicalHashBucketCount); + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index b6684e0..0a3c84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; // Optimized for sequential key lookup. @@ -124,13 +125,11 @@ public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, in public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, WriteBuffers.Position readPos) { - int storedKeyLengthLength = + int storedKeyLength = (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); - // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); - - if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { + if (isKeyLengthSmall && storedKeyLength != keyLength) { return false; } long absoluteKeyOffset = @@ -139,16 +138,14 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL writeBuffers.setReadPoint(absoluteKeyOffset, readPos); if (!isKeyLengthSmall) { // Read big value length we wrote with the value. - storedKeyLengthLength = writeBuffers.readVInt(readPos); - if (storedKeyLengthLength != keyLength) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { return false; } } // Our reading is positioned to the key. if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); return false; } @@ -174,4 +171,23 @@ public long getEstimatedMemorySize() { size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); return size; } + + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index f42430d..8b09aa6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -22,13 +22,18 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -41,17 +46,122 @@ extends VectorMapJoinFastLongHashTable implements VectorMapJoinLongHashMap, MemoryEstimate { - public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + // public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + + private final boolean isSaveNullKeyValuesForFullOuter; protected VectorMapJoinFastValueStore valueStore; private BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedLongHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastLongHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + public NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount){ + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedDoubleIndex = nonMatchedLogicalSlotNum * 2; + if (hashMap.slotPairs[nonMatchedDoubleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotPairs[nonMatchedDoubleIndex]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedLongKey() { + return !keyIsNull; + } + + @Override + public long getNonMatchedLongKey() { + if (nonMatchedLogicalSlotNum * 2 + 1 >= hashMap.slotPairs.length) { + fake++; + } + return hashMap.slotPairs[nonMatchedLogicalSlotNum * 2 + 1]; + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + + static int fake; + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); + } + + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + + } + } + /* * A Unit Test convenience method for putting key and value into the hash table using the * actual types. @@ -91,13 +201,12 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); - long valueRef = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (valueRef == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMapResult.set(valueStore, valueRef); + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -107,12 +216,70 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre return joinResult; } + @Override + public JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) { + + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); + + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public void lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex != -1) { + matchTracker.trackMatch(pairIndex / 2); + } + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastLongHashMap( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); + fullOuterNullKeyValueRef = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 228fa72..eda8a56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -42,11 +42,29 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMultiSet.class); + private final boolean isSaveNullKeyValuesForFullOuter; + + private long fullOuterNullKeyValueCount; + @Override public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + + } + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -80,12 +98,19 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long count = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ + optimizedHashMultiSetResult.set(slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -95,10 +120,14 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre } public VectorMapJoinFastLongHashMultiSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..14b1965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public VectorMapJoinHashSetResult createHashSetResult() { return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). + adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long existance = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,10 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..8b775fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public long max() { return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); + return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot = intHashCode & logicalHashBucketMask; @@ -230,20 +227,16 @@ protected long findReadSlot(long key, long hashCode) { long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { // Given that we do not delete, an empty slot means no match. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); return -1; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); - return slotPairs[pairIndex]; + return pairIndex; } // Some other key (collision) - keep probing. probeSlot += (++i); if (i > largestNumberOfSteps) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found"); // We know we never went that far when we were inserting. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); return -1; } slot = (int)(probeSlot & logicalHashBucketMask); @@ -268,10 +261,10 @@ private void allocateBucketArray() { } public VectorMapJoinFastLongHashTable( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - this.isOuterJoin = isOuterJoin; this.hashTableKeyType = hashTableKeyType; PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() }; keyBinarySortableDeserializeRead = diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 2798010..4a63772 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -49,8 +49,8 @@ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveExcept } public VectorMapJoinFastMultiKeyHashMap( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index 0560281..31aa95f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -47,8 +47,8 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashMultiSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index 900ca55..ed8b989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -47,8 +47,7 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java new file mode 100644 index 0000000..ac87971 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; + +/* + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinFastNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinFastNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public void init() { + nonMatchedLogicalSlotNum = -1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 777eb45..94dde50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -35,11 +35,9 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastStringCommon.class); - private boolean isOuterJoin; - private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, + public boolean adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); @@ -47,7 +45,7 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -61,10 +59,10 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.currentBytesStart, keyBinarySortableDeserializeRead.currentBytesLength, currentValue); + return true; } - public VectorMapJoinFastStringCommon(boolean isOuterJoin) { - this.isOuterJoin = isOuterJoin; + public VectorMapJoinFastStringCommon() { PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index fc4edda..56068f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -30,18 +30,27 @@ */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { + private final boolean isSaveNullKeyValuesForFullOuter; + private VectorMapJoinFastStringCommon stringCommon; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + } } public VectorMapJoinFastStringHashMap( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 3dbdfa7..911a61e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -30,18 +30,30 @@ */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { - private VectorMapJoinFastStringCommon stringCommon; + private final boolean isSaveNullKeyValuesForFullOuter; + + private final VectorMapJoinFastStringCommon stringCommon; + + private long fullOuterNullKeyValueCount; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + } } public VectorMapJoinFastStringHashMultiSet( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 84f8439..3dc7847 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -33,15 +33,17 @@ private VectorMapJoinFastStringCommon stringCommon; @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). stringCommon.adaptPutRow(this, currentKey, currentValue); } public VectorMapJoinFastStringHashSet( - boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 24dfa5d..2f0ae26 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -35,6 +37,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; @@ -73,11 +76,6 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, this.estimatedKeyCount = estimatedKeyCount; - // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); - // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); - // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); - // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, estimatedKeyCount); @@ -93,13 +91,11 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { - boolean isOuterJoin = !desc.isNoOuterJoin(); - - // UNDONE VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.getHashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean isSaveNullKeyValuesForFullOuter = vectorDesc.getIsSaveNullKeyValuesForFullOuter(); boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); @@ -115,18 +111,23 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastLongHashMap( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastLongHashMultiSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastLongHashSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -135,18 +136,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastStringHashMap( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastStringHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastStringHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -155,18 +155,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastMultiKeyHashMap( - isOuterJoin, + isSaveNullKeyValuesForFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastMultiKeyHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastMultiKeyHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -195,6 +194,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public void clear() { // Do nothing } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java index 2408484..ae057fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single byte array key hash map lookup method. @@ -41,6 +43,9 @@ * The object to receive small table value(s) information on a MATCH. * Or, for SPILL, it has information on where to spill the big table row. * + * NOTE: Since the hash table can be shared, the hashMapResult serves as the non-shared + * private object for our accessing the hash table lookup values, etc. + * * @return * Whether the lookup was a match, no match, or spill (the partition with the key * is currently spilled). @@ -48,4 +53,28 @@ JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException; + + /* + * Lookup a byte array key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * ... + */ + void lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java index 2d2490c..5762cff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java @@ -30,5 +30,4 @@ * access spill information when the partition with the key is currently spilled. */ VectorMapJoinHashMapResult createHashMapResult(); - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java index e49da04..820678b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; @@ -31,7 +32,6 @@ */ public interface VectorMapJoinHashTable extends MemoryEstimate { - /* * @param currentKey * The current key. @@ -45,4 +45,8 @@ void putRow(BytesWritable currentKey, BytesWritable currentValue) * Get hash table size */ int size(); + + MatchTracker createMatchTracker(); + + VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java index ba68d35..c70d84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single long key hash map lookup method. @@ -43,4 +45,28 @@ */ JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) throws IOException; + + /* + * Lookup a long key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * ... + */ + void lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) + throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java index d0f9dcb..74cfb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java @@ -27,5 +27,4 @@ boolean useMinMax(); long min(); long max(); - } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java new file mode 100644 index 0000000..f8f4299 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/* + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinNonMatchedIterator { + + protected final MatchTracker matchTracker; + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinNonMatchedIterator(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + } + + public void init() { + nonMatchedLogicalSlotNum = -1; + } + + public boolean findNextNonMatched() { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public long getNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedBytesKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public byte[] getNonMatchedBytes() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesOffset() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesLength() { + throw new RuntimeException("Not implemented"); + } + + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + throw new RuntimeException("Not implemented"); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f95cd76..21c355c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -116,16 +116,4 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, } return hashTable; } - - /* - @Override - public com.esotericsoftware.kryo.io.Output getHybridBigTableSpillOutput(int partitionId) { - - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTableContainer; - - HashPartition hp = ht.getHashPartitions()[partitionId]; - - return hp.getMatchfileOutput(); - } - */ } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java index 9242702..07279bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java @@ -21,12 +21,19 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; public class VectorMapJoinOptimizedHashMap @@ -40,13 +47,18 @@ public VectorMapJoinHashMapResult createHashMapResult() { public static class HashMapResult extends VectorMapJoinHashMapResult { - private BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; + private final BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; public HashMapResult() { super(); bytesBytesMultiHashMapResult = new BytesBytesMultiHashMap.Result(); } + public HashMapResult(BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult) { + super(); + this.bytesBytesMultiHashMapResult = bytesBytesMultiHashMapResult; + } + public BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult() { return bytesBytesMultiHashMapResult; } @@ -106,7 +118,59 @@ public String toString() { public String getDetailedHashMapResultPositionString() { return "(Not supported yet)"; } - } + } + + protected static class NonMatchedBytesHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedHashMap hashMap; + + protected ByteSegmentRef keyRef; + + public NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinBytesTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + } + + public void doReadNonMatchedBytesKey() throws HiveException { + keyRef = nonMatchedIterator.getCurrentKeyAsRef(); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + doReadNonMatchedBytesKey(); + return true; // We have not interpreted the bytes, so return true. + } + + @Override + public byte[] getNonMatchedBytes() { + return keyRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) keyRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return keyRef.getLength(); + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } @Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, @@ -117,11 +181,32 @@ public String getDetailedHashMapResultPositionString() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMapResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMapResult); + (VectorMapJoinHashTableResult) hashMapResult, null); return joinResult; } + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + HashMapResult implementationHashMapResult = (HashMapResult) hashMapResult; + + JoinUtil.JoinResult joinResult = + doLookup(keyBytes, keyOffset, keyLength, + implementationHashMapResult.bytesBytesMultiHashMapResult(), + (VectorMapJoinHashTableResult) hashMapResult, matchTracker); + + return joinResult; + } + + @Override + public void lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + doLookupNoResult(keyBytes, keyOffset, keyLength, readPos, matchTracker); + } + public VectorMapJoinOptimizedHashMap( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java index 9921a88..cfe128c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java @@ -91,7 +91,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMultiSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMultiSetResult); + (VectorMapJoinHashTableResult) hashMultiSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java index 122f881..8f53ada 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java @@ -66,7 +66,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashSetResult); + (VectorMapJoinHashTableResult) hashSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java index 74887f7..c4591f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java @@ -25,14 +25,23 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.NonMatchedBytesHashMapIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; @@ -40,7 +49,8 @@ * Root interface for a vector map join hash table (which could be a hash map, hash multi-set, or * hash set). */ -public abstract class VectorMapJoinOptimizedHashTable implements VectorMapJoinHashTable { +public abstract class VectorMapJoinOptimizedHashTable + implements VectorMapJoinHashTable, VectorMapJoinBytesHashTable { private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOptimizedMultiKeyHashMap.class.getName()); @@ -55,6 +65,11 @@ } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws SerDeException, HiveException, IOException { @@ -69,13 +84,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) public JoinUtil.JoinResult doLookup(byte[] keyBytes, int keyOffset, int keyLength, BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult, - VectorMapJoinHashTableResult hashTableResult) { + VectorMapJoinHashTableResult hashTableResult, MatchTracker matchTracker) { hashTableResult.forget(); JoinUtil.JoinResult joinResult = adapatorDirectAccess.setDirect(keyBytes, keyOffset, keyLength, - bytesBytesMultiHashMapResult); + bytesBytesMultiHashMapResult, matchTracker); if (joinResult == JoinUtil.JoinResult.SPILL) { hashTableResult.setSpillPartitionId(adapatorDirectAccess.directSpillPartitionId()); } @@ -85,6 +100,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) return joinResult; } + public void doLookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + adapatorDirectAccess.setDirectNoResult( + keyBytes, keyOffset, keyLength, readPos, matchTracker); +} + public VectorMapJoinOptimizedHashTable( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { @@ -105,4 +127,9 @@ public long getEstimatedMemorySize() { size += (2 * JavaDataModel.get().object()); return size; } + + @Override + public MatchTracker createMatchTracker() { + return adapatorDirectAccess.createMatchTracker(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java index 9c45ed9..de1ee15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java @@ -41,8 +41,6 @@ private HashTableKeyType hashTableKeyType; - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index b21f0b3..7f37e99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -21,11 +21,20 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * An single long value hash map based on the BytesBytesMultiHashMap. @@ -37,8 +46,104 @@ extends VectorMapJoinOptimizedHashMap implements VectorMapJoinLongHashMap { + private HashTableKeyType hashTableKeyType; + private VectorMapJoinOptimizedLongCommon longCommon; + private static class NonMatchedLongHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedLongHashMap hashMap; + + // Extract long with non-shared deserializer object. + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + private long longValue; + + public NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinBytesTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + + TypeInfo integerTypeInfo; + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + integerTypeInfo = TypeInfoFactory.booleanTypeInfo; + break; + case BYTE: + integerTypeInfo = TypeInfoFactory.byteTypeInfo; + break; + case SHORT: + integerTypeInfo = TypeInfoFactory.shortTypeInfo; + break; + case INT: + integerTypeInfo = TypeInfoFactory.intTypeInfo; + break; + case LONG: + integerTypeInfo = TypeInfoFactory.longTypeInfo; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead( + new TypeInfo[] {integerTypeInfo}, false); + } + + private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveException { + + try { + byte[] keyBytes = keyRef.getBytes(); + int keyOffset = (int) keyRef.getOffset(); + int keyLength = keyRef.getLength(); + keyBinarySortableDeserializeRead.set(keyBytes, keyOffset, keyLength); + if (!keyBinarySortableDeserializeRead.readNextField()) { + return false; + } + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0; + break; + case BYTE: + longValue = keyBinarySortableDeserializeRead.currentByte; + break; + case SHORT: + longValue = keyBinarySortableDeserializeRead.currentShort; + break; + case INT: + longValue = keyBinarySortableDeserializeRead.currentInt; + break; + case LONG: + longValue = keyBinarySortableDeserializeRead.currentLong; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + } catch (IOException e) { + throw new HiveException(e); + } + return true; + } + + @Override + public boolean readNonMatchedLongKey() throws HiveException { + return readNonMatchedLongKey(nonMatchedIterator.getCurrentKeyAsRef()); + } + + @Override + public long getNonMatchedLongKey() throws HiveException { + return longValue; + } + } + @Override public boolean useMinMax() { return longCommon.useMinMax(); @@ -54,14 +159,10 @@ public long max() { return longCommon.max(); } - /* @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { - - longCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(long key, @@ -73,10 +174,31 @@ public JoinResult lookup(long key, hashMapResult); } + @Override + public JoinResult lookup(long key, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + } + + @Override + public void lookupNoResult(long key, WriteBuffers.Position readPos, + MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + super.lookupNoResult(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedLongHashMap( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); + this.hashTableKeyType = hashTableKeyType; longCommon = new VectorMapJoinOptimizedLongCommon(minMaxEnabled, isOuterJoin, hashTableKeyType); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java index 3e8e6fb..e07bbaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java @@ -27,8 +27,6 @@ public class VectorMapJoinOptimizedMultiKeyHashMap extends VectorMapJoinOptimizedHashMap { - // UNDONE: How to look for all NULLs in a multi-key????? Let nulls through for now. - public VectorMapJoinOptimizedMultiKeyHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java new file mode 100644 index 0000000..694a8c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized; + +import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.HashMapResult; + +/* + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinOptimizedNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected NonMatchedSmallTableIterator nonMatchedIterator; + + protected HashMapResult nonMatchedHashMapResult; + + public VectorMapJoinOptimizedNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public boolean findNextNonMatched() { + return nonMatchedIterator.isNext(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + if (nonMatchedHashMapResult == null) { + nonMatchedHashMapResult = new HashMapResult(nonMatchedIterator.getHashMapResult()); + } + nonMatchedHashMapResult.setJoinResult(JoinResult.MATCH); + return nonMatchedHashMapResult; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java index a8ccfa4..d3e12e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java @@ -21,24 +21,19 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashTable.SerializedBytes; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * An single byte array value hash map based on the BytesBytesMultiHashMap. - * - * Since BytesBytesMultiHashMap does not interpret the key as BinarySortable we optimize - * this case and just reference the byte array key directly for the lookup instead of serializing - * the byte array into BinarySortable. We rely on it just doing byte array equality comparisons. */ public class VectorMapJoinOptimizedStringCommon { - // private boolean isOuterJoin; - - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - - // private ReadStringResults readStringResults; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; @@ -55,15 +50,10 @@ public SerializedBytes serialize(byte[] keyBytes, int keyStart, int keyLength) t serializedBytes.length = output.getLength(); return serializedBytes; - } public VectorMapJoinOptimizedStringCommon(boolean isOuterJoin) { - // this.isOuterJoin = isOuterJoin; - // PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; - // keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos); - // readStringResults = keyBinarySortableDeserializeRead.createReadStringResults(); - // bytesWritable = new BytesWritable(); + keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(1); output = new Output(); keyBinarySortableSerializeWrite.set(output); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java index f2074ec..b822005 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java @@ -22,12 +22,19 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* - * An multi-key hash map based on the BytesBytesMultiHashMap. + * An string hash map based on the BytesBytesMultiHashMap. */ public class VectorMapJoinOptimizedStringHashMap extends VectorMapJoinOptimizedHashMap @@ -35,14 +42,59 @@ private VectorMapJoinOptimizedStringCommon stringCommon; - /* - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { + private static class NonMatchedStringHashMapIterator extends NonMatchedBytesHashMapIterator { + + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + public NonMatchedStringHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedStringHashMap hashMap) { + super(matchTracker, hashMap); + } + + @Override + public void init() { + super.init(); + + TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo }; + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */ false); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + super.doReadNonMatchedBytesKey(); + + byte[] bytes = keyRef.getBytes(); + final int keyOffset = (int) keyRef.getOffset(); + final int keyLength = keyRef.getLength(); + try { + keyBinarySortableDeserializeRead.set(bytes, keyOffset, keyLength); + return keyBinarySortableDeserializeRead.readNextField(); + } catch (IOException e) { + throw new HiveException(e); + } + } - stringCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + @Override + public byte[] getNonMatchedBytes() { + return keyBinarySortableDeserializeRead.currentBytes; + } + + @Override + public int getNonMatchedBytesOffset() { + return keyBinarySortableDeserializeRead.currentBytesStart; + } + + @Override + public int getNonMatchedBytesLength() { + return keyBinarySortableDeserializeRead.currentBytesLength; + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedStringHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, @@ -55,6 +107,27 @@ public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, } + @Override + public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyStart, keyLength); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + + } + + @Override + public void lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyOffset, keyLength); + + doLookupNoResult(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedStringHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java index a6b754c..bb2fd03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; /* * An multi-key hash map based on the BytesBytesMultiHashMultiSet. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java index fdcd83d..229dadf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; /* * An multi-key hash map based on the BytesBytesMultiHashSet. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index a235f3f..7873a8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -19,21 +19,26 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; +import java.util.TreeMap; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -43,14 +48,23 @@ import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.parse.GenTezUtils; +import org.apache.hadoop.hive.ql.parse.JoinType; import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -59,13 +73,21 @@ import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OpTraits; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.util.ReflectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,7 +105,6 @@ private static final Logger LOG = LoggerFactory.getLogger(ConvertJoinMapJoin.class.getName()); - @Override /* * (non-Javadoc) we should ideally not modify the tree we traverse. However, @@ -119,6 +140,7 @@ boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); if (!hiveConvertJoin) { + // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize); @@ -182,6 +204,15 @@ // reduced by 1 mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); mapJoinOp.setStatistics(joinOp.getStatistics()); + + JoinCondDesc[] conds = joinOp.getConf().getConds(); + if (conds.length == 1 && conds[0].getType() == JoinDesc.FULL_OUTER_JOIN) { + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + if (!mapJoinDesc.isDynamicPartitionHashJoin()) { + FullOuterMapJoinOptimization.generateSharedMemoryPlan(mapJoinOp); + } + } + // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { setAllChildrenTraits(childOp, mapJoinOp.getOpTraits()); @@ -1225,8 +1256,9 @@ private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int positi List columnStats = new ArrayList<>(); for (String key : keys) { ColStatistics cs = inputStats.getColumnStatisticsFromColName(key); + LOG.debug("Statistics obtained for {} of reduce sink operator {}: {}", + key, rsOp.toString(), (cs != null)); if (cs == null) { - LOG.debug("Couldn't get statistics for: {}", key); return true; } columnStats.add(cs); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 0000000..8d61986 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,341 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; + +import com.google.common.base.Preconditions; + +public class FullOuterMapJoinOptimization { + + private static ReduceSinkOperator addIntersectSmallTableReduceSink( + MapJoinOperator mapJoinOp, int posSmallTable) + throws SemanticException { + + List> mapJoinParents = mapJoinOp.getParentOperators(); + Preconditions.checkState(mapJoinParents.get(posSmallTable) instanceof ReduceSinkOperator); + + ReduceSinkOperator smallTableReduceSink = + (ReduceSinkOperator) mapJoinOp.getParentOperators().get(posSmallTable); + + List> smallTableReduceSinkParents = + smallTableReduceSink.getParentOperators(); + Preconditions.checkState(smallTableReduceSinkParents.size() == 1); + + Operator smallTableReduceSinkParent = + smallTableReduceSinkParents.get(0); + + ReduceSinkDesc intersectSmallTableReduceSinkDesc = + (ReduceSinkDesc) smallTableReduceSink.getConf().clone(); + intersectSmallTableReduceSinkDesc.setPartitionCols(new ArrayList()); + intersectSmallTableReduceSinkDesc.setNumReducers(1); + intersectSmallTableReduceSinkDesc.setOutputName("intersect"); + + ReduceSinkOperator intersectSmallTableReduceSink = + (ReduceSinkOperator) OperatorFactory.get( + smallTableReduceSink.getCompilationOpContext(), + intersectSmallTableReduceSinkDesc); + intersectSmallTableReduceSink.setColumnExprMap(new HashMap()); + + // Connect smallTableReduceSinkParent and intersectSmallTableReduceSink. + smallTableReduceSinkParent.getChildOperators().add(intersectSmallTableReduceSink); + intersectSmallTableReduceSink.getParentOperators().add(smallTableReduceSinkParent); + + return intersectSmallTableReduceSink; + } + + private static MapJoinOperator createIntersectMapJoin( + ReduceSinkOperator intersectSmallTableReduceSink, + List> intersectMapJoinParents, + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, int posSmallTable) + throws SemanticException{ + + MapJoinDesc interceptMapJoinDesc = new MapJoinDesc(mapJoinDesc); + interceptMapJoinDesc.setFullOuterIntersect(true); + + MapJoinOperator intersectMapJoinOp = + (MapJoinOperator) OperatorFactory.get( + mapJoinOp.getCompilationOpContext(), + interceptMapJoinDesc); + + // Make intersectMapJoin a child of intersectSmallTableReduceSink. + intersectSmallTableReduceSink.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posSmallTable, intersectSmallTableReduceSink); + + return intersectMapJoinOp; + } + + private static ReduceSinkOperator addAuxiliaryReduceSink( + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, + MapJoinBigTableInfo mapJoinBigTableInfo) + throws SemanticException { + + // Get the column names of the aggregations for reduce sink + List mapJoinOutputNames = mapJoinOp.getConf().getOutputColumnNames(); + + ArrayList mapJoinSignature = mapJoinOp.getSchema().getSignature(); + + ArrayList auxiliaryReduceSinkKeyExprs = new ArrayList(); + int[] bigTableOutputKeyColumnNums = mapJoinBigTableInfo.getOutputKeyColumnNums(); + final int bigTableOutputKeySize = bigTableOutputKeyColumnNums.length; + for (int i = 0; i < bigTableOutputKeySize; i++) { + final int bigTableOutputKeyColumnNum = bigTableOutputKeyColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputKeyColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputKeyColumnNum), "", false); + auxiliaryReduceSinkKeyExprs.add(colExpr); + } + + ArrayList auxiliaryReduceSinkValueExprs = new ArrayList(); + List auxiliaryValueOutputColumnNames = new ArrayList(); + int[] bigTableOutputValueColumnNums = mapJoinBigTableInfo.getOutputValueColumnNums(); + final int bigTableOutputValueSize = bigTableOutputValueColumnNums.length; + for (int i = 0; i < bigTableOutputValueSize; i++) { + final int bigTableOutputValueColumnNum = bigTableOutputValueColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputValueColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputValueColumnNum), "", false); + auxiliaryReduceSinkValueExprs.add(colExpr); + auxiliaryValueOutputColumnNames.add("_col" + i); + } + + ReduceSinkDesc auxiliaryReduceSinkDesc = + PlanUtils.getReduceSinkDesc( + auxiliaryReduceSinkKeyExprs, + auxiliaryReduceSinkValueExprs, + auxiliaryValueOutputColumnNames, + false, -1, 0, 1, Operation.NOT_ACID); + auxiliaryReduceSinkDesc.setPartitionCols(new ArrayList()); + auxiliaryReduceSinkDesc.setNumReducers(1); + auxiliaryReduceSinkDesc.setOutputName("auxiliaryIntersect"); + + ReduceSinkOperator auxiliaryReduceSink = + (ReduceSinkOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSinkDesc, + new RowSchema(mapJoinOp.getSchema()), + mapJoinOp); + auxiliaryReduceSink.setColumnExprMap(new HashMap()); + + return auxiliaryReduceSink; + } + + private static SelectOperator addRenameSelect( + ReduceSinkOperator auxiliaryReduceSink, + MapJoinBigTableInfo mapJoinBigTableInfo) + throws SemanticException{ + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + // A rename SELECT that maps column names... + Map renameSelectColNameToExprMap = new HashMap(); + Map renameSelectColNumToExprMap = new TreeMap(); + + ArrayList auxiliarReduceSinkKeyCols = auxiliaryReduceSinkDesc.getKeyCols(); + List auxiliarReduceSinkOutputKeyColumnNames = + auxiliaryReduceSinkDesc.getOutputKeyColumnNames(); + int[] bigTableInputKeyColumnMap = mapJoinBigTableInfo.getInputKeyColumnMap(); + final int renameKeySize = auxiliarReduceSinkKeyCols.size(); + int columnNum = 0; + String keyPrefix = Utilities.ReduceField.KEY.name() + "."; + for (int i = 0; i < renameKeySize; i++) { + String inputColumnName = keyPrefix + auxiliarReduceSinkOutputKeyColumnNames.get(i); + ExprNodeColumnDesc keyColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkKeyCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + keyColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputKeyColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + } + + ArrayList auxiliarReduceSinkValueCols = auxiliaryReduceSinkDesc.getValueCols(); + List auxiliarReduceSinkOutputValueColumnNames = + auxiliaryReduceSinkDesc.getOutputValueColumnNames(); + int[] bigTableInputValueColumnMap = mapJoinBigTableInfo.getInputValueColumnMap(); + final int renameValueSize = auxiliarReduceSinkValueCols.size(); + String valuePrefix = Utilities.ReduceField.VALUE.name() + "."; + for (int i = 0; i < renameValueSize; i++) { + String inputColumnName = valuePrefix + auxiliarReduceSinkOutputValueColumnNames.get(i); + ExprNodeColumnDesc valueColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkValueCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + valueColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputValueColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + } + + List renameSelectColExprs = new ArrayList(); + renameSelectColExprs.addAll(renameSelectColNumToExprMap.values()); + + List renameSelectOutputColumnNames = new ArrayList(); + ArrayList renameSelectColumnInfo = new ArrayList(); + + final int renameSelectSize = renameSelectColExprs.size(); + for (int i = 0; i < renameSelectSize; i++) { + String outputColumnName = "_col" + i; + renameSelectOutputColumnNames.add(outputColumnName); + ColumnInfo colInfo = + new ColumnInfo( + outputColumnName, + renameSelectColExprs.get(i).getTypeInfo(), + "", false); + renameSelectColumnInfo.add(colInfo); + } + + SelectDesc renameSelectDesc = + new SelectDesc( + renameSelectColExprs, + renameSelectOutputColumnNames); + + SelectOperator renameSelect = + (SelectOperator) OperatorFactory.get( + auxiliaryReduceSink.getCompilationOpContext(), + renameSelectDesc); + renameSelect.setSchema(new RowSchema(renameSelectColumnInfo)); + renameSelect.setColumnExprMap(renameSelectColNameToExprMap); + + return renameSelect; + } + + public static void generateSharedMemoryPlan(MapJoinOperator mapJoinOp) + throws SemanticException { + + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + + // UNDONE: Why is this necessary? + mapJoinDesc.setFilterMap(null); + + int posBigTable = mapJoinDesc.getPosBigTable(); + int posSmallTable = (posBigTable == 0 ? 1 : 0); + + /* + * Add ReduceSink for sending Small Table to INTERSECT MapJoin. + */ + ReduceSinkOperator intersectSmallTableReduceSink = + addIntersectSmallTableReduceSink(mapJoinOp, posSmallTable); + + /* + * Create FULL OUTER INTERSECT MapJoin. + */ + + // Get ready to set the FULL OUTER INTERCEPT MapJoin parents. + List> intersectMapJoinParents = + new ArrayList>(); + intersectMapJoinParents.add(null); + intersectMapJoinParents.add(null); + + MapJoinOperator intersectMapJoinOp = + createIntersectMapJoin( + intersectSmallTableReduceSink, + intersectMapJoinParents, + mapJoinOp, mapJoinDesc, posSmallTable); + + /* + * Create auxiliary ReduceSink that sends first-time key matches from FULL OUTER MapJoin to + * FULL OUTER INTERSECT MapJoin. + */ + MapJoinBigTableInfo mapJoinBigTableInfo = + VectorMapJoinBaseOperator.getBigTableInfo(mapJoinDesc); + + ReduceSinkOperator auxiliaryReduceSink = + addAuxiliaryReduceSink(mapJoinOp, mapJoinDesc, mapJoinBigTableInfo); + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + /* + * Add a SELECT to rename the Reduce-Shuffle column names to '_colN'... + */ + SelectOperator renameSelect = + addRenameSelect( + auxiliaryReduceSink, + mapJoinBigTableInfo); + + /* + * Do current new operators connecting. + */ + auxiliaryReduceSink.getChildOperators().add(renameSelect); + renameSelect.getParentOperators().add(auxiliaryReduceSink); + + renameSelect.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posBigTable, renameSelect); + intersectMapJoinOp.setParentOperators(intersectMapJoinParents); + + /* + * Put the special UNION operator in to combine the output of FULL OUTER MapJoin and + * FULL OUTER INTERSECT MapJoin operators. + */ + + // Detatch child below MapJoin. + Operator mapJoinChild = mapJoinOp.getChildOperators().get(0); + mapJoinOp.setChildOperators(new ArrayList>()); + mapJoinChild.setParentOperators(new ArrayList>()); + + ArrayList> unionParents = + new ArrayList>(); + unionParents.add(mapJoinOp); + unionParents.add(intersectMapJoinOp); + + UnionOperator unionOp = + (UnionOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSink.getCompilationOpContext(), + new UnionDesc(), + new RowSchema(mapJoinOp.getSchema().getSignature()), + unionParents); + + unionOp.getChildOperators().add(mapJoinChild); + mapJoinChild.getParentOperators().add(unionOp); + + mapJoinOp.getChildOperators().add(auxiliaryReduceSink); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 275a31f..1077b70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -515,15 +515,9 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o seenPostitions.add(condn.getLeft()); seenPostitions.add(condn.getRight()); - if (joinType == JoinDesc.FULL_OUTER_JOIN) { - // setting these 2 parameters here just in case that if the code got - // changed in future, these 2 are not missing. - seenOuterJoin = true; - lastSeenRightOuterJoin = false; - // empty set - cannot convert - return new HashSet(); - } else if (joinType == JoinDesc.LEFT_OUTER_JOIN - || joinType == JoinDesc.LEFT_SEMI_JOIN) { + if (joinType == JoinDesc.LEFT_OUTER_JOIN || + joinType == JoinDesc.LEFT_SEMI_JOIN || + joinType == JoinDesc.FULL_OUTER_JOIN) { seenOuterJoin = true; if(bigTableCandidates.size() == 0) { bigTableCandidates.add(condn.getLeft()); @@ -1044,6 +1038,7 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinCondDesc[] condns = desc.getConds(); Byte[] tagOrder = desc.getTagOrder(); + // UNDONE: Fix this comment... // outer join cannot be performed on a table which is being cached if (!noCheckOuterJoin) { if (checkMapJoin(mapJoinPos, condns) < 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13a2fc4..35a7251 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -67,6 +67,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectStringOperator; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; @@ -121,6 +127,8 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MergeJoinWork; +import org.apache.hadoop.hive.ql.plan.OpTraits; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -709,10 +717,61 @@ public VectorDesc getVectorDesc() { } } - private List> newOperatorList() { + private static List> newOperatorList() { return new ArrayList>(); } + public static void debugDisplayJoinOperatorTree(Operator joinOperator, + String prefix) { + List> currentParentList = newOperatorList(); + currentParentList.add(joinOperator); + + int depth = 0; + do { + List> nextParentList = newOperatorList(); + + final int count = currentParentList.size(); + for (int i = 0; i < count; i++) { + Operator parent = currentParentList.get(i); + System.out.println(prefix + " parent depth " + depth + " " + parent.getClass().getSimpleName() + " " + parent.toString()); + + List> parentList = parent.getParentOperators(); + if (parentList == null || parentList.size() == 0) { + continue; + } + + nextParentList.addAll(parentList); + } + + currentParentList = nextParentList; + depth--; + } while (currentParentList.size() > 0); + + List> currentChildList = newOperatorList(); + currentChildList.addAll(joinOperator.getChildOperators()); + + depth = 1; + do { + List> nextChildList = newOperatorList(); + + final int count = currentChildList.size(); + for (int i = 0; i < count; i++) { + Operator child = currentChildList.get(i); + System.out.println(prefix + " child depth " + depth + " " + child.getClass().getSimpleName() + " " + child.toString()); + + List> childList = child.getChildOperators(); + if (childList == null || childList.size() == 0) { + continue; + } + + nextChildList.addAll(childList); + } + + currentChildList = nextChildList; + depth--; + } while (currentChildList.size() > 0); + } + private Operator validateAndVectorizeOperatorTree( Operator nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, @@ -946,6 +1005,11 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + } else if (baseWork instanceof MergeJoinWork){ + MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + + // MergeJoinExplainVectorization will say vectorization not supported. + mergeJoinWork.setVectorizationExamined(true); } } } else if (currTask instanceof SparkTask) { @@ -2928,7 +2992,7 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; HashTableKind hashTableKind = HashTableKind.NONE; HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE; + VectorMapJoinVariation vectorMapJoinVariation = null; if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; @@ -2998,6 +3062,10 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorMapJoinVariation = VectorMapJoinVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; + case JoinDesc.FULL_OUTER_JOIN: + vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER; + hashTableKind = HashTableKind.HASH_MAP; + break; case JoinDesc.LEFT_SEMI_JOIN: vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; @@ -3027,6 +3095,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterLongOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectLongOperator.class; + } else { + opClass = VectorMapJoinFullOuterLongOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3045,6 +3120,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterStringOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectStringOperator.class; + } else { + opClass = VectorMapJoinFullOuterStringOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3063,6 +3145,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterMultiKeyOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectMultiKeyOperator.class; + } else { + opClass = VectorMapJoinFullOuterMultiKeyOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3078,6 +3167,11 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + // UNDONE: Not needed for SHARED-MEMORY Non-INTERSECT. + vectorDesc.setIsSaveNullKeyValuesForFullOuter(true); + } vectorDesc.setMinMaxEnabled(minMaxEnabled); vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -3190,6 +3284,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi /* * Similarly, we need a mapping since a value expression can be a calculation and the value * will go into a scratch column. + * + * Value expressions include keys? YES. */ int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; @@ -3229,18 +3325,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* - * Small table information. + * Column mapping. */ - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); @@ -3250,7 +3352,6 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi * Gather up big and small table output result information from the MapJoinDesc. */ List bigTableRetainList = desc.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); int[] smallTableIndices; int smallTableIndicesSize; @@ -3287,6 +3388,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + + final int bigTableRetainSize = bigTableRetainList.size(); for (int i = 0; i < bigTableRetainSize; i++) { // Since bigTableValueExpressions may do a calculation and produce a scratch column, we @@ -3300,9 +3403,10 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); } nextOutputColumn++; @@ -3319,10 +3423,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi nextOutputColumn = firstSmallTableOutputColumn; // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - String[] bigTableRetainedNames; if (smallTableIndicesSize > 0) { smallTableOutputCount = smallTableIndicesSize; - bigTableRetainedNames = new String[smallTableOutputCount]; for (int i = 0; i < smallTableIndicesSize; i++) { if (smallTableIndices[i] >= 0) { @@ -3334,34 +3436,39 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex]; TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; if (!isOuterJoin) { // Optimize inner join keys of small table results. + // UNDONE: The columns seem backwards here... // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo); + + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) { - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); + // When the Big Key is not retained in the output result, we do need to copy the + // Big Table key into the overflow batch so the projection of it (Big Table key) to + // the Small Table key will work properly... + // + nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo); } } else { - // For outer joins, since the small table key can be null when there is no match, + // For outer joins, since the small table key can be null when there for NOMATCH, // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. + // projection optimization used by non-[FULL} OUTER joins above. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo); } } else { @@ -3375,21 +3482,18 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); // Make a new big table scratch column for the small table value. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } nextOutputColumn++; } } else if (smallTableRetainSize > 0) { smallTableOutputCount = smallTableRetainSize; - bigTableRetainedNames = new String[smallTableOutputCount]; // Only small table values appear in join output result. @@ -3402,21 +3506,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - // Make a new big table scratch column for the small table value. TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); nextOutputColumn++; } - } else { - bigTableRetainedNames = new String[0]; } + Map> filterExpressions = desc.getFilters(); + VectorExpression[] bigTableFilterExpressions = + vContext.getVectorExpressions( + filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions); + boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); @@ -3472,15 +3579,23 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); + + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); + + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - bigTableOuterKeyMapping.finalize(); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); - smallTableMapping.finalize(); + smallTableValueMapping.finalize(); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java index ea22131..acbbba9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java @@ -125,7 +125,7 @@ public String getJoinCondString() { sb.append("Inner Join "); break; case JoinDesc.FULL_OUTER_JOIN: - sb.append("Outer Join "); + sb.append("Full Outer Join "); break; case JoinDesc.LEFT_OUTER_JOIN: sb.append("Left Outer Join "); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 91ea159..7187c7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Map Join operator Descriptor implementation. @@ -82,6 +83,7 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; + private boolean isFullOuterIntersect = false; public MapJoinDesc() { bigTableBucketNumMapping = new LinkedHashMap(); @@ -92,6 +94,7 @@ public MapJoinDesc(MapJoinDesc clone) { this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; + this.valueFilteredTblDescs = clone.valueFilteredTblDescs; this.posBigTable = clone.posBigTable; this.valueIndices = clone.valueIndices; this.retainList = clone.retainList; @@ -383,6 +386,7 @@ public boolean getGenJoinKeys() { return genJoinKeys; } + @Explain(displayName = "DynamicPartitionHashJoin", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, displayOnlyOnTrue = true) public boolean isDynamicPartitionHashJoin() { return isDynamicPartitionHashJoin; } @@ -391,6 +395,15 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + @Explain(displayName = "fullOuterIntersect", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, displayOnlyOnTrue = true) + public boolean isFullOuterIntersect() { + return isFullOuterIntersect; + } + + public void setFullOuterIntersect(boolean isFullOuterIntersect) { + this.isFullOuterIntersect = isFullOuterIntersect; + } + // Use LinkedHashSet to give predictable display order. private static final Set vectorizableMapJoinNativeEngines = new LinkedHashSet(Arrays.asList("tez", "spark")); @@ -490,7 +503,7 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsNotMet(nativeConditions); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableKeyExpressions() { return vectorExpressionsToStringList( isNative ? @@ -498,8 +511,16 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, vectorMapJoinDesc.getAllBigTableKeyExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableKeyColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "hashTableImplementationType", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String hashTableImplementationType() { + if (!isNative) { + return null; + } + return vectorMapJoinDesc.getHashTableImplementationType().name(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyColumns() { if (!isNative) { return null; } @@ -507,7 +528,9 @@ public String getBigTableKeyColumnNums() { if (bigTableKeyColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableKeyColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableKeyColumnMap(), + vectorMapJoinInfo.getBigTableKeyTypeInfos()); } @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) @@ -518,8 +541,16 @@ public String getBigTableKeyColumnNums() { vectorMapJoinDesc.getAllBigTableValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableValueColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableFilterExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableFilterExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableFilterExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueColumns() { if (!isNative) { return null; } @@ -527,39 +558,59 @@ public String getBigTableValueColumnNums() { if (bigTableValueColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableValueColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableValueColumnMap(), + vectorMapJoinInfo.getBigTableValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getSmallTableColumns() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableValueMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSmallTableColumns() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getSmallTableValueMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutput", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getProjectedOutputColumnNums() { + if (!isNative) { + return null; + } + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getProjectionMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumnNums() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + return Arrays.toString(vectorMapJoinInfo.getBigTableRetainColumnMap()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List getBigTableOuterKey() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "nonOuterSmallTableKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getNonOuterSmallTableKeyMapping() { + if (!isNative || + (vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.OUTER || + vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.FULL_OUTER)) { + return null; + } + return Arrays.toString(vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outerSmallTableKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getOuterSmallTableKeyMapping() { if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { return null; } - return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + return columnMappingToStringList(vectorMapJoinInfo.getOuterSmallTableKeyMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableRetainedColumnNums() { - if (!isNative) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "fullOuterSmallTableKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getFullOuterSmallTableKeyMapping() { + if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.FULL_OUTER) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + return columnMappingToStringList(vectorMapJoinInfo.getFullOuterSmallTableKeyMapping()); } @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index b0ae64a..0682529 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -25,11 +25,15 @@ import java.util.Map.Entry; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.ReduceWork.ReduceExplainVectorization; import org.apache.hadoop.mapred.JobConf; public class MergeJoinWork extends BaseWork { @@ -179,4 +183,51 @@ public boolean getLlapMode() { public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } + public class MergeJoinExplainVectorization extends BaseExplainVectorization { + + private final MergeJoinWork mergeJoinWork; + + private VectorizationCondition[] mergeWorkVectorizationConditions; + + public MergeJoinExplainVectorization(MergeJoinWork mergeJoinWork) { + super(mergeJoinWork); + this.mergeJoinWork = mergeJoinWork; + } + + private VectorizationCondition[] createMergeWorkExplainVectorizationConditions() { + + boolean enabled = false; + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + "Vectorizing MergeJoin Supported") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(mergeWorkVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(mergeWorkVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "MergeJoin Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MergeJoinExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MergeJoinExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java index 446b810..5439e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -59,6 +59,25 @@ public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) return Arrays.toString(outputColumns); } + public List outputColumnsAndTypesToStringList(int[] outputColumns, TypeInfo[] typeInfos) { + final int size = outputColumns.length; + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(outputColumns[i] + ":" + typeInfos[i].toString()); + } + return result; + } + + public List outputColumnsAndTypesToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + TypeInfo[] typeInfos = vectorColumnMapping.getTypeInfos(); + return outputColumnsAndTypesToStringList(outputColumns, typeInfos); + } + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { final int size = vectorColumnMapping.getCount(); if (size == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index f2955af..3b3d4c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; @@ -188,6 +189,15 @@ public Object clone() { return outputKeyColumnNames; } + @Explain(displayName = "output key column names", explainLevels = { Level.EXTENDED }) + public List getOutputKeyColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputKeyColumnNames) { + result.add(Utilities.ReduceField.KEY.name() + "." + name); + } + return result; + } + public void setOutputKeyColumnNames( java.util.ArrayList outputKeyColumnNames) { this.outputKeyColumnNames = outputKeyColumnNames; @@ -197,6 +207,15 @@ public void setOutputKeyColumnNames( return outputValueColumnNames; } + @Explain(displayName = "output value column names", explainLevels = { Level.EXTENDED }) + public List getOutputValueColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputValueColumnNames) { + result.add(Utilities.ReduceField.VALUE.name() + "." + name); + } + return result; + } + public void setOutputValueColumnNames( java.util.ArrayList outputValueColumnNames) { this.outputValueColumnNames = outputValueColumnNames; @@ -536,34 +555,38 @@ public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getKeyColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyColumns() { if (!isNative) { return null; } int[] keyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap(); if (keyColumnMap == null) { // Always show an array. - keyColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(keyColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkKeyColumnMap(), + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getValueColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueColumns() { if (!isNative) { return null; } int[] valueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap(); if (valueColumnMap == null) { // Always show an array. - valueColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(valueColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkValueColumnMap(), + vectorReduceSinkInfo.getReduceSinkValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBucketColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBucketColumns() { if (!isNative) { return null; } @@ -572,11 +595,13 @@ public String getBucketColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(bucketColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkBucketColumnMap(), + vectorReduceSinkInfo.getReduceSinkBucketTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getPartitionColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { if (!isNative) { return null; } @@ -585,7 +610,9 @@ public String getPartitionColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(partitionColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkPartitionColumnMap(), + vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos()); } private VectorizationCondition[] createNativeConditions() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 58032ca..a8f045c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -85,11 +85,11 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } public static enum VectorMapJoinVariation { - NONE, - INNER_BIG_ONLY, INNER, + INNER_BIG_ONLY, LEFT_SEMI, - OUTER + OUTER, + FULL_OUTER } private HashTableImplementationType hashTableImplementationType; @@ -107,7 +107,7 @@ public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; - vectorMapJoinVariation = VectorMapJoinVariation.NONE; + vectorMapJoinVariation = null; minMaxEnabled = false; allBigTableKeyExpressions = null; @@ -206,6 +206,7 @@ public VectorMapJoinInfo getVectorMapJoinInfo() { private List notSupportedKeyTypes; private boolean smallTableExprVectorizes; private boolean outerJoinHasNoKeys; + boolean isSaveNullKeyValuesForFullOuter; public void setUseOptimizedTable(boolean useOptimizedTable) { this.useOptimizedTable = useOptimizedTable; @@ -274,5 +275,10 @@ public void setIsHybridHashJoin(boolean isHybridHashJoin) { public boolean getIsHybridHashJoin() { return isHybridHashJoin; } - + public void setIsSaveNullKeyValuesForFullOuter(boolean isSaveNullKeyValuesForFullOuter) { + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + } + public boolean getIsSaveNullKeyValuesForFullOuter() { + return isSaveNullKeyValuesForFullOuter; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 6db0540..ad82e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -48,9 +48,19 @@ private TypeInfo[] bigTableValueTypeInfos; private VectorExpression[] slimmedBigTableValueExpressions; - private VectorColumnOutputMapping bigTableRetainedMapping; - private VectorColumnOutputMapping bigTableOuterKeyMapping; - private VectorColumnSourceMapping smallTableMapping; + private VectorExpression[] bigTableFilterExpressions; + + private int[] bigTableRetainColumnMap; + private TypeInfo[] bigTableRetainTypeInfos; + + private int[] nonOuterSmallTableKeyColumnMap; + private TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + private VectorColumnOutputMapping outerSmallTableKeyMapping; + + private VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + + private VectorColumnSourceMapping smallTableValueMapping; private VectorColumnSourceMapping projectionMapping; @@ -65,9 +75,19 @@ public VectorMapJoinInfo() { bigTableValueTypeInfos = null; slimmedBigTableValueExpressions = null; - bigTableRetainedMapping = null; - bigTableOuterKeyMapping = null; - smallTableMapping = null; + bigTableFilterExpressions = null; + + bigTableRetainColumnMap = null; + bigTableRetainTypeInfos = null; + + nonOuterSmallTableKeyColumnMap = null; + nonOuterSmallTableKeyTypeInfos = null; + + outerSmallTableKeyMapping = null; + + fullOuterSmallTableKeyMapping = null; + + smallTableValueMapping = null; projectionMapping = null; } @@ -138,28 +158,69 @@ public void setSlimmedBigTableValueExpressions( this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } - public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { - this.bigTableRetainedMapping = bigTableRetainedMapping; + public VectorExpression[] getBigTableFilterExpressions() { + return bigTableFilterExpressions; + } + + public void setBigTableFilterExpressions(VectorExpression[] bigTableFilterExpressions) { + this.bigTableFilterExpressions = bigTableFilterExpressions; + } + + public void setBigTableRetainColumnMap(int[] bigTableRetainColumnMap) { + this.bigTableRetainColumnMap = bigTableRetainColumnMap; + } + + public int[] getBigTableRetainColumnMap() { + return bigTableRetainColumnMap; + } + + public void setBigTableRetainTypeInfos(TypeInfo[] bigTableRetainTypeInfos) { + this.bigTableRetainTypeInfos = bigTableRetainTypeInfos; + } + + public TypeInfo[] getBigTableRetainTypeInfos() { + return bigTableRetainTypeInfos; + } + + public void setNonOuterSmallTableKeyColumnMap(int[] nonOuterSmallTableKeyColumnMap) { + this.nonOuterSmallTableKeyColumnMap = nonOuterSmallTableKeyColumnMap; + } + + public int[] getNonOuterSmallTableKeyColumnMap() { + return nonOuterSmallTableKeyColumnMap; + } + + public void setNonOuterSmallTableKeyTypeInfos(TypeInfo[] nonOuterSmallTableKeyTypeInfos) { + this.nonOuterSmallTableKeyTypeInfos = nonOuterSmallTableKeyTypeInfos; + } + + public TypeInfo[] getNonOuterSmallTableKeyTypeInfos() { + return nonOuterSmallTableKeyTypeInfos; + } + + public void setOuterSmallTableKeyMapping(VectorColumnOutputMapping outerSmallTableKeyMapping) { + this.outerSmallTableKeyMapping = outerSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableRetainedMapping() { - return bigTableRetainedMapping; + public VectorColumnOutputMapping getOuterSmallTableKeyMapping() { + return outerSmallTableKeyMapping; } - public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { - this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + public void setFullOuterSmallTableKeyMapping( + VectorColumnSourceMapping fullOuterSmallTableKeyMapping) { + this.fullOuterSmallTableKeyMapping = fullOuterSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableOuterKeyMapping() { - return bigTableOuterKeyMapping; + public VectorColumnSourceMapping getFullOuterSmallTableKeyMapping() { + return fullOuterSmallTableKeyMapping; } - public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { - this.smallTableMapping = smallTableMapping; + public void setSmallTableValueMapping(VectorColumnSourceMapping smallTableValueMapping) { + this.smallTableValueMapping = smallTableValueMapping; } - public VectorColumnSourceMapping getSmallTableMapping() { - return smallTableMapping; + public VectorColumnSourceMapping getSmallTableValueMapping() { + return smallTableValueMapping; } public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java index 9f785e6..e5c749f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java @@ -84,9 +84,9 @@ public void testGetNonExistent() throws Exception { map.put(kv2, -1); key[0] = (byte)(key[0] + 1); BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(key, 0, key.length, hashMapResult); + map.getValueResult(key, 0, key.length, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); - map.getValueResult(key, 0, 0, hashMapResult); + map.getValueResult(key, 0, 0, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); } @@ -104,7 +104,7 @@ public void testPutWithFullMap() throws Exception { assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(new byte[0], 0, 0, hashMapResult); + map.getValueResult(new byte[0], 0, 0, hashMapResult, null); } @Test @@ -123,7 +123,7 @@ public void testExpand() throws Exception { private void verifyHashMapResult(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - byte state = map.getValueResult(key, 0, key.length, hashMapResult); + byte state = map.getValueResult(key, 0, key.length, hashMapResult, null); HashSet hs = new HashSet(); int count = 0; if (hashMapResult.hasRows()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java index 6491d79..244208b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java @@ -26,8 +26,22 @@ private static final long serialVersionUID = 1L; + private boolean isClosed; + private boolean isAborted; + public CollectorTestOperator() { super(); + + isClosed = false; + isAborted = false; + } + + public boolean getIsClosed() { + return isClosed; + } + + public boolean getIsAborted() { + return isAborted; } @Override @@ -36,6 +50,14 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean abort) { + isClosed = true; + if (abort) { + isAborted = true; + } + } + + @Override public String getName() { return CollectorTestOperator.class.getSimpleName(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java index 18933d4..d1aaef6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java @@ -28,19 +28,22 @@ private static final long serialVersionUID = 1L; private final ObjectInspector[] outputObjectInspectors; + private final int columnSize; public RowCollectorTestOperator(ObjectInspector[] outputObjectInspectors) { super(); this.outputObjectInspectors = outputObjectInspectors; + columnSize = outputObjectInspectors.length; } @Override public void process(Object row, int tag) throws HiveException { rowCount++; Object[] rowObjectArray = (Object[]) row; - Object[] resultObjectArray = new Object[rowObjectArray.length]; - for (int c = 0; c < rowObjectArray.length; c++) { - resultObjectArray[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + Object[] resultObjectArray = new Object[columnSize]; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); } nextTestRow(new RowTestObjects(resultObjectArray)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java index 06cd1e9..a2f9f04 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java @@ -39,6 +39,16 @@ public RowVectorCollectorTestOperator(TypeInfo[] outputTypeInfos, vectorExtractRow.init(outputTypeInfos); } + public RowVectorCollectorTestOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors) throws HiveException { + super(); + this.outputObjectInspectors = outputObjectInspectors; + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(outputTypeInfos, outputProjectionColumnNums); + } + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java index 51a5f8e..547530e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java @@ -26,55 +26,129 @@ import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; public class RowTestObjectsMultiSet { - private SortedMap sortedMap; - private int rowCount; - private int totalCount; + + public static enum RowFlag { + NONE (0), + REGULAR (0x01), + LEFT_OUTER (0x02), + FULL_OUTER (0x04); + + public final long value; + RowFlag(long value) { + this.value = value; + } + } + + private static class Value { + + // Mutable. + public int count; + public long rowFlags; + + public final int initialKeyCount; + public final int initialValueCount; + public final RowFlag initialRowFlag; + + public Value(int count, RowFlag rowFlag, int totalKeyCount, int totalValueCount) { + this.count = count; + this.rowFlags = rowFlag.value; + + initialKeyCount = totalKeyCount; + initialValueCount = totalValueCount; + initialRowFlag = rowFlag; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("count "); + sb.append(count); + return sb.toString(); + } + } + + private SortedMap sortedMap; + private int totalKeyCount; + private int totalValueCount; public RowTestObjectsMultiSet() { - sortedMap = new TreeMap(); - rowCount = 0; - totalCount = 0; + sortedMap = new TreeMap(); + totalKeyCount = 0; + totalValueCount = 0; } - public int getRowCount() { - return rowCount; + public int getTotalKeyCount() { + return totalKeyCount; } - public int getTotalCount() { - return totalCount; + public int getTotalValueCount() { + return totalValueCount; } - public void add(RowTestObjects testRow) { + public void add(RowTestObjects testRow, RowFlag rowFlag) { if (sortedMap.containsKey(testRow)) { - Integer count = sortedMap.get(testRow); - count++; + Value value = sortedMap.get(testRow); + value.count++; + value.rowFlags |= rowFlag.value; + totalValueCount++; } else { - sortedMap.put(testRow, 1); - rowCount++; + sortedMap.put(testRow, new Value(1, rowFlag, ++totalKeyCount, ++totalValueCount)); } - totalCount++; + } - public boolean verify(RowTestObjectsMultiSet other) { + public void add(RowTestObjects testRow, int count) { + if (sortedMap.containsKey(testRow)) { + throw new RuntimeException(); + } + sortedMap.put(testRow, new Value(count, RowFlag.NONE, ++totalKeyCount, ++totalValueCount)); + } + + public String displayRowFlags(long rowFlags) { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (RowFlag rowFlag : RowFlag.values()) { + if ((rowFlags & rowFlag.value) != 0) { + if (sb.length() > 1) { + sb.append(", "); + } + sb.append(rowFlag.name()); + } + } + sb.append("}"); + return sb.toString(); + } + + public boolean verify(RowTestObjectsMultiSet other, String left, String right) { final int thisSize = this.sortedMap.size(); final int otherSize = other.sortedMap.size(); if (thisSize != otherSize) { - System.out.println("*VERIFY* count " + thisSize + " doesn't match otherSize " + otherSize); + System.out.println("*BENCHMARK* " + left + " count " + thisSize + " doesn't match " + right + " " + otherSize); return false; } - Iterator> thisIterator = this.sortedMap.entrySet().iterator(); - Iterator> otherIterator = other.sortedMap.entrySet().iterator(); + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + Iterator> otherIterator = other.sortedMap.entrySet().iterator(); for (int i = 0; i < thisSize; i++) { - Entry thisEntry = thisIterator.next(); - Entry otherEntry = otherIterator.next(); + Entry thisEntry = thisIterator.next(); + Entry otherEntry = otherIterator.next(); if (!thisEntry.getKey().equals(otherEntry.getKey())) { - System.out.println("*VERIFY* thisEntry.getKey() " + thisEntry.getKey() + " doesn't match otherEntry.getKey() " + otherEntry.getKey()); + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + + " count " + thisEntry.getValue().count + ")" + + " but found " + right + " row " + otherEntry.getKey().toString() + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } // Check multi-set count. - if (!thisEntry.getValue().equals(otherEntry.getValue())) { - System.out.println("*VERIFY* key " + thisEntry.getKey() + " count " + thisEntry.getValue() + " doesn't match " + otherEntry.getValue()); + if (thisEntry.getValue().count != otherEntry.getValue().count) { + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " count " + thisEntry.getValue().count + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + ")" + + " doesn't match " + right + " row count " + otherEntry.getValue().count + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } } @@ -84,6 +158,51 @@ public boolean verify(RowTestObjectsMultiSet other) { return true; } + public RowTestObjectsMultiSet subtract(RowTestObjectsMultiSet other) { + RowTestObjectsMultiSet result = new RowTestObjectsMultiSet(); + + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + while (thisIterator.hasNext()) { + Entry thisEntry = thisIterator.next(); + + if (other.sortedMap.containsKey(thisEntry.getKey())) { + Value thisValue = thisEntry.getValue(); + Value otherValue = other.sortedMap.get(thisEntry.getKey()); + if (thisValue.count == otherValue.count) { + continue; + } + } + result.add(thisEntry.getKey(), thisEntry.getValue().count); + } + + return result; + } + + public void displayDifferences(RowTestObjectsMultiSet other, String left, String right) { + + RowTestObjectsMultiSet leftOnly = this.subtract(other); + Iterator> leftOnlyIterator = + leftOnly.sortedMap.entrySet().iterator(); + while (leftOnlyIterator.hasNext()) { + Entry leftOnlyEntry = leftOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + left + " only row " + leftOnlyEntry.getKey().toString() + + " count " + leftOnlyEntry.getValue().count + + " (initialRowFlag " + leftOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + + RowTestObjectsMultiSet rightOnly = other.subtract(this); + Iterator> rightOnlyIterator = + rightOnly.sortedMap.entrySet().iterator(); + while (rightOnlyIterator.hasNext()) { + Entry rightOnlyEntry = rightOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + right + " only row " + rightOnlyEntry.getKey().toString() + + " count " + rightOnlyEntry.getValue().count + + " (initialRowFlag " + rightOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + } + @Override public String toString() { return sortedMap.toString(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 2d0c783..6ce63a4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -62,9 +62,8 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, OperatorDesc conf * Override forward to do validation */ @Override - public void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + public void vectorForward(VectorizedRowBatch vrg) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch) row; int[] projections = vrg.projectedColumns; assertEquals(2, vrg.projectionSize); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index 0514e3f..2bd270d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -26,10 +26,14 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -37,31 +41,42 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -90,45 +105,209 @@ NATIVE_VECTOR_FAST } + public static boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { + return + (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && + mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + + /* + * This test collector operator is for MapJoin row-mode. + */ + public static class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public TestMultiSetCollectorOperator( + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) { + super(outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetCollectorOperator.class.getSimpleName(); + } + } + + public static class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) + throws HiveException { + super(outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) throws HiveException { + super(outputProjectionColumnNums, outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetVectorCollectorOperator.class.getSimpleName(); + } + } + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { + return createMapJoinDesc(testDesc, false); + } + + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc, + boolean isFullOuterIntersect) { MapJoinDesc mapJoinDesc = new MapJoinDesc(); + mapJoinDesc.setPosBigTable(0); - List keyExpr = new ArrayList(); + + List bigTableKeyExpr = new ArrayList(); for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) { - keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false)); + bigTableKeyExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableKeyTypeInfos[i], + testDesc.bigTableKeyColumnNames[i], "B", false)); } Map> keyMap = new HashMap>(); - keyMap.put((byte)0, keyExpr); + keyMap.put((byte) 0, bigTableKeyExpr); + + mapJoinDesc.setFullOuterIntersect(isFullOuterIntersect); + + // Big Table expression includes all columns -- keys and extra (value) columns. + // UNDONE: Assumes all values retained... + List bigTableExpr = new ArrayList(); + for (int i = 0; i < testDesc.bigTableColumnNames.length; i++) { + bigTableExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableTypeInfos[i], + testDesc.bigTableColumnNames[i], "B", false)); + } + + Map> exprMap = new HashMap>(); + exprMap.put((byte) 0, bigTableExpr); + + List smallTableKeyExpr = new ArrayList(); + + for (int i = 0; i < testDesc.smallTableKeyTypeInfos.length; i++) { + ExprNodeColumnDesc exprNodeColumnDesc = + new ExprNodeColumnDesc( + testDesc.smallTableKeyTypeInfos[i], + testDesc.smallTableKeyColumnNames[i], "S", false); + smallTableKeyExpr.add(exprNodeColumnDesc); + } + // Retained Small Table keys and values. List smallTableExpr = new ArrayList(); - for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { - smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false)); + final int smallTableRetainKeySize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < smallTableRetainKeySize; i++) { + int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableKeyColumnNum], + testDesc.smallTableColumnNames[smallTableKeyColumnNum], "S", false)); + } + + final int smallTableRetainValueSize = testDesc.smallTableRetainValueColumnNums.length; + for (int i = 0; i < smallTableRetainValueSize; i++) { + int smallTableValueColumnNum = + smallTableRetainKeySize + testDesc.smallTableRetainValueColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableValueColumnNum], + testDesc.smallTableColumnNames[smallTableValueColumnNum], "S", false)); } - keyMap.put((byte)1, smallTableExpr); + + keyMap.put((byte) 1, smallTableKeyExpr); + exprMap.put((byte) 1, smallTableExpr); mapJoinDesc.setKeys(keyMap); - mapJoinDesc.setExprs(keyMap); + mapJoinDesc.setExprs(exprMap); Byte[] order = new Byte[] {(byte) 0, (byte) 1}; mapJoinDesc.setTagOrder(order); - mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER); + mapJoinDesc.setNoOuterJoin( + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER && + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.FULL_OUTER); Map> filterMap = new HashMap>(); filterMap.put((byte) 0, new ArrayList()); // None. mapJoinDesc.setFilters(filterMap); List bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums); - - // For now, just small table values... - List smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums); - Map> retainListMap = new HashMap>(); retainListMap.put((byte) 0, bigTableRetainColumnNumsList); - retainListMap.put((byte) 1, smallTableRetainColumnNumsList); + + // For now, just small table keys/values... + if (testDesc.smallTableRetainKeyColumnNums.length == 0) { + + // Just the value columns numbers with retain. + List smallTableValueRetainColumnNumsList = + intArrayToList(testDesc.smallTableRetainValueColumnNums); + + retainListMap.put((byte) 1, smallTableValueRetainColumnNumsList); + } else { + + // Both the key/value columns numbers. + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + ArrayList smallTableValueIndicesNumsList = new ArrayList();; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(testDesc.smallTableRetainKeyColumnNums[i]); + } + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(-testDesc.smallTableRetainValueColumnNums[i] - 1); + } + int[] smallTableValueIndicesNums = + ArrayUtils.toPrimitive(smallTableValueIndicesNumsList.toArray(new Integer[0])); + + Map valueIndicesMap = new HashMap(); + valueIndicesMap.put((byte) 1, smallTableValueIndicesNums); + mapJoinDesc.setValueIndices(valueIndicesMap); + } mapJoinDesc.setRetainList(retainListMap); + switch (testDesc.mapJoinPlanVariation) { + case DYNAMIC_PARTITION_HASH_JOIN: + // FULL OUTER which behaves differently for dynamic partition hash join. + mapJoinDesc.setDynamicPartitionHashJoin(true); + break; + case SHARED_SMALL_TABLE: + mapJoinDesc.setDynamicPartitionHashJoin(false); + break; + default: + throw new RuntimeException( + "Unexpected map join plan variation " + testDesc.mapJoinPlanVariation); + } + int joinDescType; switch (testDesc.vectorMapJoinVariation) { case INNER: @@ -141,6 +320,9 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { case OUTER: joinDescType = JoinDesc.LEFT_OUTER_JOIN; break; + case FULL_OUTER: + joinDescType = JoinDesc.FULL_OUTER_JOIN; + break; default: throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation); } @@ -149,12 +331,25 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { mapJoinDesc.setConds(conds); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils - .getFieldSchemasFromColumnList(keyExpr, "")); + .getFieldSchemasFromColumnList(smallTableKeyExpr, "")); mapJoinDesc.setKeyTblDesc(keyTableDesc); + // Small Table expression value columns. + List smallTableValueExpr = new ArrayList(); + + // All Small Table keys and values. + for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { + smallTableValueExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableValueTypeInfos[i], + testDesc.smallTableValueColumnNames[i], "S", false)); + } + TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc( - PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, "")); + PlanUtils.getFieldSchemasFromColumnList(smallTableValueExpr, "")); ArrayList valueTableDescsList = new ArrayList(); + + // Big Table entry, then Small Table entry. valueTableDescsList.add(null); valueTableDescsList.add(valueTableDesc); mapJoinDesc.setValueTblDescs(valueTableDescsList); @@ -180,6 +375,7 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t hashTableKind = HashTableKind.HASH_SET; break; case OUTER: + case FULL_OUTER: hashTableKind = HashTableKind.HASH_MAP; break; default: @@ -190,9 +386,17 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t if (testDesc.bigTableKeyTypeInfos.length == 1) { switch (((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) { case BOOLEAN: + hashTableKeyType = HashTableKeyType.BOOLEAN; + break; case BYTE: + hashTableKeyType = HashTableKeyType.BYTE; + break; case SHORT: + hashTableKeyType = HashTableKeyType.SHORT; + break; case INT: + hashTableKeyType = HashTableKeyType.INT; + break; case LONG: hashTableKeyType = HashTableKeyType.LONG; break; @@ -216,49 +420,112 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorDesc.setAllBigTableKeyExpressions(null); - vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); - vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); - vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); + vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums); + vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos); vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); vectorDesc.setAllBigTableValueExpressions(null); + vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]); + + + /* + * Column mapping. + */ + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); + + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + int nextOutputColumn = 0; - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); - for (int i = 0; i < testDesc.bigTableTypeInfos.length; i++) { - bigTableRetainedMapping.add(i, i, testDesc.bigTableTypeInfos[i]); - projectionMapping.add(i, i, testDesc.bigTableKeyTypeInfos[i]); + final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainedSize; i++) { + final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i]; + TypeInfo typeInfo = testDesc.bigTableTypeInfos[i]; + projectionMapping.add( + nextOutputColumn, batchColumnIndex, typeInfo); + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + + // Tolerate repeated use of a big table column. + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + nextOutputColumn++; } - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + int emulateScratchColumn = testDesc.bigTableTypeInfos.length; + + VectorColumnOutputMapping smallTableKeyOutputMapping = + new VectorColumnOutputMapping("Small Table Key Output Mapping"); + final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum]; + TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]; + if (!isOuterJoin) { + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo); + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) { + nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo); + } + } else { + outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo); + + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + emulateScratchColumn++; + } + nextOutputColumn++; + } // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); - int outputColumn = testDesc.bigTableTypeInfos.length; + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { - smallTableMapping.add(i, outputColumn, testDesc.smallTableValueTypeInfos[i]); - projectionMapping.add(outputColumn, outputColumn, testDesc.smallTableValueTypeInfos[i]); - outputColumn++; + smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + emulateScratchColumn++; + nextOutputColumn++; } // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); + + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - smallTableMapping.finalize(); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -267,7 +534,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorMapJoinInfo.setProjectionMapping(projectionMapping); - assert projectionMapping.getCount() == testDesc.outputColumnNames.length; + if (projectionMapping.getCount() != testDesc.outputColumnNames.length) { + throw new RuntimeException(); + }; vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -306,6 +575,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterLongOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -331,6 +611,16 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( operator = new VectorMapJoinOuterStringOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); @@ -358,6 +648,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterMultiKeyOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -365,16 +666,31 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( default: throw new RuntimeException("Unknown hash table key type " + vectorDesc.getHashTableKeyType()); } + System.out.println("*BENCHMARK* createNativeVectorMapJoinOperator " + + operator.getClass().getSimpleName()); return operator; } public static VectorizationContext createVectorizationContext(MapJoinTestDescription testDesc) throws HiveException { VectorizationContext vContext = - new VectorizationContext("test", testDesc.bigTableColumnNamesList); + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + if (isOuterJoin) { + + // We need physical columns. + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyRetainColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + vContext.allocateScratchColumn(testDesc.smallTableKeyTypeInfos[smallTableKeyRetainColumnNum]); + } + } // Create scratch columns to hold small table results. - for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } return vContext; @@ -390,19 +706,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi final Byte smallTablePos = 1; - // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here??? TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc(); AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance( BinarySortableSerDe.class, null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); - TableDesc valueTableDesc; + final List valueTableDescList; if (mapJoinDesc.getNoOuterJoin()) { - valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueTblDescs(); } else { - valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueFilteredTblDescs(); } + TableDesc valueTableDesc = valueTableDescList.get(smallTablePos); AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance( valueTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); @@ -414,16 +730,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi } public static void connectOperators( - MapJoinTestDescription testDesc, Operator operator, - Operator testCollectorOperator) throws HiveException { - Operator[] parents = new Operator[] {operator}; - testCollectorOperator.setParentOperators(Arrays.asList(parents)); - Operator[] childOperators = new Operator[] {testCollectorOperator}; - operator.setChildOperators(Arrays.asList(childOperators)); - HiveConf.setBoolVar(testDesc.hiveConf, - HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + Operator childOperator) throws HiveException { + + List> newParentOperators = newOperatorList(); + newParentOperators.addAll(childOperator.getParentOperators()); + newParentOperators.add(operator); + childOperator.setParentOperators(newParentOperators); + + List> newChildOperators = newOperatorList(); + newChildOperators.addAll(operator.getChildOperators()); + newChildOperators.add(childOperator); + operator.setChildOperators(newChildOperators); + } private static List intArrayToList(int[] intArray) { @@ -509,9 +828,25 @@ private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJ mapJoinTableContainer.seal(); } - public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) + public static class CreateMapJoinResult { + public final MapJoinOperator mapJoinOperator; + public final MapJoinTableContainer mapJoinTableContainer; + public final MapJoinTableContainerSerDe mapJoinTableContainerSerDe; + + public CreateMapJoinResult( + MapJoinOperator mapJoinOperator, + MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + this.mapJoinOperator = mapJoinOperator; + this.mapJoinTableContainer = mapJoinTableContainer; + this.mapJoinTableContainerSerDe = mapJoinTableContainerSerDe; + } + } + public static CreateMapJoinResult createMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { final Byte bigTablePos = 0; @@ -539,11 +874,16 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, operator = new MapJoinOperator(new CompilationOpContext()); operator.setConf(mapJoinDesc); } else { - VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList); + VectorizationContext vContext = + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + /* + // UNDONE: Unclear this belonds in the input VectorizationContext... // Create scratch columns to hold small table results. for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } + */ // This is what the Vectorizer class does. VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc(); @@ -571,21 +911,20 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, } } - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); - - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - return operator; + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); } - public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) + public static CreateMapJoinResult createNativeVectorMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc); - - // UNDONE mapJoinDesc.setVectorDesc(vectorDesc); vectorDesc.setHashTableImplementationType(hashTableImplementationType); @@ -593,13 +932,14 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); MapJoinTableContainer mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null; switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: mapJoinTableContainer = new MapJoinBytesTableContainer( testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0); - MapJoinTableContainerSerDe mapJoinTableContainerSerDe = + mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc); mapJoinTableContainer.setSerde( @@ -615,7 +955,11 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType()); } - loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// if (shareMapJoinTableContainer == null) { + loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// } else { +// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer); +// } VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc); @@ -636,56 +980,295 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t vectorDesc, vContext); - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } - return operator; + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc) + throws SerDeException, IOException, HiveException { + return createMapJoinImplementation( + mapJoinImplementation, testDesc, testData, mapJoinDesc, null); } - public static MapJoinOperator createMapJoinImplementation(MapJoinTestImplementation mapJoinImplementation, + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, - Operator testCollectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc) throws SerDeException, IOException, HiveException { + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, + MapJoinTableContainer shareMapJoinTableContainer) + throws SerDeException, IOException, HiveException { - MapJoinOperator operator; + CreateMapJoinResult result; switch (mapJoinImplementation) { case ROW_MODE_HASH_MAP: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ true); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ true, + shareMapJoinTableContainer); break; case ROW_MODE_OPTIMIZED: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ false, + shareMapJoinTableContainer); break; case VECTOR_PASS_THROUGH: // VectorMapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ true, - /* n/a */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ true, + /* n/a */ false, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_OPTIMIZED: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.OPTIMIZED); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.OPTIMIZED, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_FAST: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.FAST); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.FAST, + shareMapJoinTableContainer); break; default: throw new RuntimeException("Unexpected MapJoin Operator Implementation " + mapJoinImplementation); } - return operator; + return result; + } + + private static Operator makeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + String[] outputColumnNames, TypeInfo[] outputTypeInfos) { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + List selectExprList = new ArrayList(); + List selectOutputColumnNameList = new ArrayList(); + for (int i = 0; i < bigTableRetainSize; i++) { + String selectOutputColumnName = "_col" + i; + selectOutputColumnNameList.add(selectOutputColumnName); + + TypeInfo outputTypeInfo = outputTypeInfos[i]; + if (i < bigTableKeySize) { + + // Big Table key. + ExprNodeColumnDesc keyColumnExpr = + new ExprNodeColumnDesc( + outputTypeInfo, + outputColumnNames[i], "test", false); + selectExprList.add(keyColumnExpr); + } else { + + // For row-mode, substitute NULL constant for any non-key extra Big Table columns. + ExprNodeConstantDesc nullExtraColumnExpr = + new ExprNodeConstantDesc( + outputTypeInfo, + null); + nullExtraColumnExpr.setFoldedFromCol(outputColumnNames[i]); + selectExprList.add(nullExtraColumnExpr); + } + } + + SelectDesc selectDesc = new SelectDesc(selectExprList, selectOutputColumnNameList); + Operator selectOperator = + OperatorFactory.get(new CompilationOpContext(), selectDesc); + + return selectOperator; + } + + private static Operator vectorizeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + Operator selectOperator) throws HiveException{ + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + + SelectDesc selectDesc = (SelectDesc) selectOperator.getConf(); + List selectExprs = selectDesc.getColList(); + + VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize]; + for (int i = 0; i < bigTableRetainSize; i++) { + + TypeInfo typeInfo = selectExprs.get(i).getTypeInfo(); + if (i < bigTableKeySize) { + + // Big Table key. + selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i)); + } else { + + // For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify + // the batch). + + // FULL OUTER INTERCEPT does not look at non-key columns. + + NoOpExpression noOpExpression = new NoOpExpression(i); + + noOpExpression.setInputTypeInfos(typeInfo); + noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + + noOpExpression.setOutputTypeInfo(typeInfo); + noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + selectVectorExpr[i] = noOpExpression; + } + } + + System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + + Arrays.toString(selectVectorExpr)); + + int[] projectedColumns = + ArrayUtils.toPrimitive( + vOutContext.getProjectedColumns().subList(0, bigTableRetainSize). + toArray(new Integer[0])); + System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + + Arrays.toString(projectedColumns)); + + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + vectorSelectDesc.setSelectExpressions(selectVectorExpr); + vectorSelectDesc.setProjectedOutputColumns(projectedColumns); + + Operator vectorSelectOperator = OperatorFactory.getVectorOperator( + selectOperator.getCompilationOpContext(), selectDesc, + vOutContext, vectorSelectDesc); + + return vectorSelectOperator; + } + + public static CountCollectorTestOperator addFullOuterIntercept( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, + MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) + throws SerDeException, IOException, HiveException { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + // For FULL OUTER MapJoin, we require all Big Keys to be present in the output result. + // The first N output columns are the Big Table key columns. + Map> keyMap = mapJoinDesc.getKeys(); + List bigTableKeyExprs = keyMap.get((byte) 0); + final int bigTableKeySize = bigTableKeyExprs.size(); + + Map> retainMap = mapJoinDesc.getRetainList(); + List bigTableRetainList = retainMap.get((byte) 0); + final int bigTableRetainSize = bigTableRetainList.size(); + + List outputColumnNameList = mapJoinDesc.getOutputColumnNames(); + String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]); + + // Use a utility method to get the MapJoin output TypeInfo. + TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc); + + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); + + /* + * Always create a row-mode SelectOperator. If we are vector-mode, next we will use its + * expressions and replace it with a VectorSelectOperator. + */ + Operator selectOperator = + makeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, + mapJoinOutputColumnNames, mapJoinOutputTypeInfos); + + List selectOutputColumnNameList = + ((SelectDesc) selectOperator.getConf()).getOutputColumnNames(); + String[] selectOutputColumnNames = + selectOutputColumnNameList.toArray(new String[0]); + + if (isVectorOutput) { + selectOperator = + vectorizeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator); + } + + /* + * Create test description just for FULL OUTER INTERCEPT with different + */ + MapJoinTestDescription interceptTestDesc = + new MapJoinTestDescription( + testDesc.hiveConf, testDesc.vectorMapJoinVariation, + selectOutputColumnNames, + Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), + testDesc.bigTableKeyColumnNums, + testDesc.smallTableValueTypeInfos, + testDesc.smallTableRetainKeyColumnNums, + testDesc.smallTableGenerationParameters, + testDesc.mapJoinPlanVariation); + + MapJoinDesc intersectMapJoinDesc = + createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */ true); + + /* + * Create FULL OUTER INTERSECT MapJoin operator. + */ + CreateMapJoinResult interceptCreateMapJoinResult = + createMapJoinImplementation( + mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc); + MapJoinOperator intersectMapJoinOperator = + interceptCreateMapJoinResult.mapJoinOperator; + MapJoinTableContainer intersectMapJoinTableContainer = + interceptCreateMapJoinResult.mapJoinTableContainer; + MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = + interceptCreateMapJoinResult.mapJoinTableContainerSerDe; + + connectOperators(mapJoinOperator, selectOperator); + + connectOperators(selectOperator, intersectMapJoinOperator); + + CountCollectorTestOperator interceptTestCollectorOperator; + if (!isVectorOutput) { + interceptTestCollectorOperator = + new TestMultiSetCollectorOperator( + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vContext = + ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext(); + int[] intersectProjectionColumns = + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0])); + interceptTestCollectorOperator = + new TestMultiSetVectorCollectorOperator( + intersectProjectionColumns, + interceptTestDesc.outputTypeInfos, + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator); + + // Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc. + intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors); + + // Now, invoke initializeOp methods from the root MapJoin operator. + mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables container references to our test data. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + intersectMapJoinOperator.setTestMapJoinTableContainer( + 1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe); + + return interceptTestCollectorOperator; + } + + private static List> newOperatorList() { + return new ArrayList>(); } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java index d763695..bfa7b47 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Random; import java.util.Map.Entry; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -58,6 +60,8 @@ HashMap smallTableKeyHashMap; + List fullOuterAdditionalSmallTableKeys; + ArrayList smallTableValueCounts; ArrayList> smallTableValues; @@ -68,17 +72,42 @@ public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, this.smallTableRandomSeed = smallTableRandomSeed; - generateTypes = generateTypesFromTypeInfos(testDesc.bigTableTypeInfos); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + generateTypes = generateTypesFromTypeInfos( + testDesc.bigTableTypeInfos, + testDesc.bigTableKeyColumnNums.length, + isOuterJoin); generator = new VectorBatchGenerator(generateTypes); bigTableBatch = generator.createBatch(); // Add small table result columns. - ColumnVector[] newCols = new ColumnVector[bigTableBatch.cols.length + testDesc.smallTableValueTypeInfos.length]; + + // Only [FULL] OUTER MapJoin needs a physical column. + final int smallTableRetainKeySize = + (isOuterJoin ? testDesc.smallTableRetainKeyColumnNums.length : 0); + ColumnVector[] newCols = + new ColumnVector[ + bigTableBatch.cols.length + + smallTableRetainKeySize + + testDesc.smallTableValueTypeInfos.length]; System.arraycopy(bigTableBatch.cols, 0, newCols, 0, bigTableBatch.cols.length); + int colIndex = bigTableBatch.cols.length; + + if (isOuterJoin) { + for (int s = 0; s < smallTableRetainKeySize; s++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[s]; + newCols[colIndex++] = + VectorizedBatchUtil.createColumnVector( + testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]); + } + } for (int s = 0; s < testDesc.smallTableValueTypeInfos.length; s++) { - newCols[bigTableBatch.cols.length + s] = + newCols[colIndex++] = VectorizedBatchUtil.createColumnVector(testDesc.smallTableValueTypeInfos[s]); } bigTableBatch.cols = newCols; @@ -88,63 +117,83 @@ public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, bigTableBatchStream = new VectorBatchGenerateStream( bigTableRandomSeed, generator, rowCount); - VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + VectorExtractRow keyVectorExtractRow = new VectorExtractRow(); + keyVectorExtractRow.init(testDesc.bigTableKeyTypeInfos, testDesc.bigTableKeyColumnNums); smallTableGenerationParameters = testDesc.getSmallTableGenerationParameters(); + HashMap bigTableKeyHashMap = new HashMap(); smallTableKeyHashMap = new HashMap(); + Random smallTableRandom = new Random(smallTableRandomSeed); // Start small table random generation // from beginning. ValueOption valueOption = smallTableGenerationParameters.getValueOption(); - int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); + if (valueOption != ValueOption.NO_REGULAR_SMALL_KEYS) { + int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - bigTableBatch.reset(); - bigTableBatchStream.fillNext(bigTableBatch); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + bigTableBatch.reset(); + bigTableBatchStream.fillNext(bigTableBatch); - final int size = bigTableBatch.size; - for (int i = 0; i < size; i++) { + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { - if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { - - RowTestObjects testKey = getTestKey(bigTableBatch, i, vectorExtractRow, + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); + bigTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); + + if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { - if (valueOption == ValueOption.ONLY_ONE) { - if (smallTableKeyHashMap.containsKey(testKey)) { - continue; + if (valueOption == ValueOption.ONLY_ONE) { + if (smallTableKeyHashMap.containsKey(testKey)) { + continue; + } } + smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } - smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } } } //--------------------------------------------------------------------------------------------- - // UNDONE: For now, don't add more small keys... - /* - // Add more small table keys that are not in Big Table batches. - final int smallTableAdditionalLength = 1 + random.nextInt(4); - final int smallTableAdditionalSize = smallTableAdditionalLength * maxBatchSize; - VectorizedRowBatch[] smallTableAdditionalBatches = createBigTableBatches(generator, smallTableAdditionalLength); - for (int i = 0; i < smallTableAdditionalLength; i++) { - generator.generateBatch(smallTableAdditionalBatches[i], random, maxBatchSize); + // Add more small table keys that are not in Big Table or Small Table for FULL OUTER. + + fullOuterAdditionalSmallTableKeys = new ArrayList(); + + VectorBatchGenerateStream altBigTableBatchStream = + new VectorBatchGenerateStream( + smallTableRandomSeed, generator, 100); + altBigTableBatchStream.reset(); + while (altBigTableBatchStream.isNext()) { + bigTableBatch.reset(); + altBigTableBatchStream.fillNext(bigTableBatch); + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, + testDesc.bigTableKeyTypeInfos.length, + testDesc.bigTableObjectInspectors); + if (bigTableKeyHashMap.containsKey(testKey) || + smallTableKeyHashMap.containsKey(testKey)) { + continue; + } + RowTestObjects testKeyClone = (RowTestObjects) testKey.clone(); + smallTableKeyHashMap.put(testKeyClone, -1); + fullOuterAdditionalSmallTableKeys.add(testKeyClone); + } } - TestRow[] additionalTestKeys = getTestKeys(smallTableAdditionalBatches, vectorExtractRow, - testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); - final int smallTableAdditionKeyProbes = smallTableAdditionalSize / 2; - for (int i = 0; i < smallTableAdditionKeyProbes; i++) { - int index = random.nextInt(smallTableAdditionalSize); - TestRow additionalTestKey = additionalTestKeys[index]; - smallTableKeyHashMap.put((TestRow) additionalTestKey.clone(), -1); + + // Make sure there is a NULL key. + Object[] nullKeyRowObjects = new Object[testDesc.bigTableKeyTypeInfos.length]; + RowTestObjects nullTestKey = new RowTestObjects(nullKeyRowObjects); + if (!smallTableKeyHashMap.containsKey(nullTestKey)) { + smallTableKeyHashMap.put(nullTestKey, -1); + fullOuterAdditionalSmallTableKeys.add(nullTestKey); } - */ // Number the test rows with collection order. int addCount = 0; @@ -177,9 +226,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes MapJoinOperator operator) throws HiveException { VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); - final int columnCount = testDesc.bigTableKeyTypeInfos.length; + final int columnCount = testDesc.bigTableTypeInfos.length; Object[] row = new Object[columnCount]; testData.bigTableBatchStream.reset(); @@ -194,7 +243,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes operator.process(row, 0); } } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJoinTestData testData, @@ -207,7 +258,9 @@ public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJ operator.process(testData.bigTableBatch, 0); } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void generateVariationData(MapJoinTestData testData, @@ -219,6 +272,7 @@ public static void generateVariationData(MapJoinTestData testData, break; case INNER: case OUTER: + case FULL_OUTER: testData.generateRandomSmallTableCounts(testDesc, random); testData.generateRandomSmallTableValues(testDesc, random); break; @@ -230,10 +284,15 @@ public static void generateVariationData(MapJoinTestData testData, private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescription testDesc, Random random) { final int columnCount = testDesc.smallTableValueTypeInfos.length; - Object[] smallTableValueRow = VectorRandomRowSource.randomWritablePrimitiveRow(columnCount, random, - testDesc.smallTableValuePrimitiveTypeInfos); + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + for (int i = 0; i < columnCount; i++) { + primitiveTypeInfos[i] = (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[i]; + } + Object[] smallTableValueRow = + VectorRandomRowSource.randomWritablePrimitiveRow( + columnCount, random, primitiveTypeInfos); for (int c = 0; c < smallTableValueRow.length; c++) { - smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableObjectInspectors[c]).copyObject(smallTableValueRow[c]); + smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableValueObjectInspectors[c]).copyObject(smallTableValueRow[c]); } return new RowTestObjects(smallTableValueRow); } @@ -241,7 +300,7 @@ private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescri private void generateRandomSmallTableCounts(MapJoinTestDescription testDesc, Random random) { smallTableValueCounts = new ArrayList(); for (Entry testKeyEntry : smallTableKeyHashMap.entrySet()) { - final int valueCount = 1 + random.nextInt(19); + final int valueCount = 1 + random.nextInt(3); smallTableValueCounts.add(valueCount); } } @@ -258,14 +317,26 @@ private void generateRandomSmallTableValues(MapJoinTestDescription testDesc, Ran } } - private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos) { + private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos, + int keyCount, boolean isOuterJoin) { final int size = typeInfos.length; GenerateType[] generateTypes = new GenerateType[size]; for (int i = 0; i < size; i++) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[i]; GenerateCategory category = - GenerateCategory.generateCategoryFromPrimitiveCategory(primitiveTypeInfo.getPrimitiveCategory()); - generateTypes[i] = new GenerateType(category); + GenerateCategory.generateCategoryFromPrimitiveCategory( + primitiveTypeInfo.getPrimitiveCategory()); + final boolean allowNulls; + if (i >= keyCount) { + + // Value columns can be NULL. + allowNulls = true; + } else { + + // Non-OUTER JOIN operators expect NULL keys to have been filtered out. + allowNulls = isOuterJoin; + } + generateTypes[i] = new GenerateType(category, allowNulls); } return generateTypes; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..15979ad 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -28,19 +29,24 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { + DYNAMIC_PARTITION_HASH_JOIN, + SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +88,103 @@ public int getNoMatchKeyOutOfAThousand() { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { + this( + hiveConf, + vectorMapJoinVariation, + /* bigTableColumnNames */ null, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + } + public MapJoinTestDescription ( HiveConf hiveConf, VectorMapJoinVariation vectorMapJoinVariation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + String[] bigTableColumnNames, + TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) { + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { super(hiveConf); + this.vectorMapJoinVariation = vectorMapJoinVariation; this.bigTableColumnNames = bigTableColumnNames; this.bigTableTypeInfos = bigTableTypeInfos; this.bigTableKeyColumnNums = bigTableKeyColumnNums; - this.smallTableValueColumnNames = smallTableValueColumnNames; + this.smallTableValueTypeInfos = smallTableValueTypeInfos; - this.bigTableRetainColumnNums = bigTableRetainColumnNums; - this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums; - this.smallTableRetainValueColumnNums = smallTableRetainValueColumnNums; + + this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums;; this.smallTableGenerationParameters = smallTableGenerationParameters; - switch (vectorMapJoinVariation) { - case INNER_BIG_ONLY: - case LEFT_SEMI: - trimAwaySmallTableValueInfo(); - break; - case INNER: - case OUTER: - break; - default: - throw new RuntimeException("Unknown operator variation " + vectorMapJoinVariation); - } + this.mapJoinPlanVariation = mapJoinPlanVariation; computeDerived(); } @@ -155,45 +194,121 @@ public SmallTableGenerationParameters getSmallTableGenerationParameters() { } public void computeDerived() { - bigTableColumnNamesList = Arrays.asList(bigTableColumnNames); - bigTableKeyColumnNames = new String[bigTableKeyColumnNums.length]; - bigTableKeyTypeInfos = new TypeInfo[bigTableKeyColumnNums.length]; - for (int i = 0; i < bigTableKeyColumnNums.length; i++) { - bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNums[i]]; - bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNums[i]]; + final int bigTableSize = bigTableTypeInfos.length; + + if (bigTableKeyColumnNames == null) { + + // Automatically populate. + bigTableColumnNames = new String[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNames[i] = "_col" + i; + } } - smallTableValueColumnNamesList = Arrays.asList(smallTableValueColumnNames); + // Automatically populate. + bigTableColumnNums = new int[bigTableSize]; - bigTableObjectInspectors = new ObjectInspector[bigTableTypeInfos.length]; - for (int i = 0; i < bigTableTypeInfos.length; i++) { - bigTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((PrimitiveTypeInfo) bigTableTypeInfos[i]); + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNums[i] = i; } - bigTableObjectInspectorsList = Arrays.asList(bigTableObjectInspectors); - smallTableObjectInspectors = new ObjectInspector[smallTableValueTypeInfos.length]; - smallTablePrimitiveCategories = new PrimitiveCategory[smallTableValueTypeInfos.length]; - smallTableValuePrimitiveTypeInfos = new PrimitiveTypeInfo[smallTableValueTypeInfos.length]; - for (int i = 0; i < smallTableValueTypeInfos.length; i++) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) smallTableValueTypeInfos[i]; - smallTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); - smallTablePrimitiveCategories[i] = primitiveTypeInfo.getPrimitiveCategory(); - smallTableValuePrimitiveTypeInfos[i] = primitiveTypeInfo; + // Automatically populate. + bigTableRetainColumnNums = new int[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableRetainColumnNums[i] = i; } - smallTableObjectInspectorsList = Arrays.asList(smallTableObjectInspectors); + /* + * Big Table key information. + */ + final int keySize = bigTableKeyColumnNums.length; + + bigTableKeyColumnNames = new String[keySize]; + bigTableKeyTypeInfos = new TypeInfo[keySize]; + for (int i = 0; i < keySize; i++) { + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNum]; + bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNum]; + } + + /* + * Big Table object inspectors. + */ + bigTableObjectInspectors = new ObjectInspector[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableObjectInspectors[i] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) bigTableTypeInfos[i]); + } + bigTableColumnNameList = Arrays.asList(bigTableColumnNames); + bigTableObjectInspectorList = Arrays.asList(bigTableObjectInspectors); + + /* + * Small Table key object inspectors are derived directly from the Big Table key information. + */ + smallTableKeyColumnNames = new String[keySize]; + smallTableKeyTypeInfos = Arrays.copyOf(bigTableKeyTypeInfos, keySize); + smallTableKeyObjectInspectors = new ObjectInspector[keySize]; + for (int i = 0; i < keySize; i++) { + smallTableKeyColumnNames[i] = "_col" + i; + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + smallTableKeyObjectInspectors[i] = bigTableObjectInspectors[bigTableKeyColumnNum]; + } + smallTableKeyColumnNameList = Arrays.asList(smallTableKeyColumnNames); + smallTableKeyObjectInspectorList = Arrays.asList(smallTableKeyObjectInspectors); + + // First part of Small Table information is the key information. + smallTableColumnNameList = new ArrayList(smallTableKeyColumnNameList); + List smallTableTypeInfoList = + new ArrayList(Arrays.asList(smallTableKeyTypeInfos)); + smallTableObjectInspectorList = new ArrayList(); + smallTableObjectInspectorList.addAll(smallTableKeyObjectInspectorList); + + final int valueSize = smallTableValueTypeInfos.length; + + // Automatically populate. + smallTableValueColumnNames = new String[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueColumnNames[i] = "_col" + (keySize + i); + } + + smallTableValueObjectInspectors = new ObjectInspector[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueObjectInspectors[i] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) smallTableValueTypeInfos[i]); + } + smallTableValueColumnNameList = Arrays.asList(smallTableValueColumnNames); + smallTableTypeInfoList.addAll(Arrays.asList(smallTableValueTypeInfos)); + smallTableValueObjectInspectorList = Arrays.asList(smallTableValueObjectInspectors); + + smallTableColumnNameList.addAll(smallTableValueColumnNameList); + smallTableColumnNames = smallTableColumnNameList.toArray(new String[0]); + smallTableTypeInfos = smallTableTypeInfoList.toArray(new TypeInfo[0]); + + smallTableObjectInspectorList.addAll(smallTableValueObjectInspectorList); + + /* + * The inputObjectInspectors describe the keys and values of the Big Table and Small Table. + */ bigTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - bigTableColumnNamesList, Arrays.asList((ObjectInspector[]) bigTableObjectInspectors)); + bigTableColumnNameList, bigTableObjectInspectorList); smallTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - smallTableValueColumnNamesList, Arrays.asList((ObjectInspector[]) smallTableObjectInspectors)); + smallTableColumnNameList, smallTableObjectInspectorList); inputObjectInspectors = - new ObjectInspector[] { bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + new ObjectInspector[] { + bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + + // For now, we always retain the Small Table values... + // Automatically populate. + smallTableRetainValueColumnNums = new int[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableRetainValueColumnNums[i] = i; + } int outputLength = bigTableRetainColumnNums.length + @@ -203,12 +318,13 @@ public void computeDerived() { outputTypeInfos = new TypeInfo[outputLength]; int outputIndex = 0; - for (int i = 0; i < bigTableRetainColumnNums.length; i++) { + final int bigTableRetainSize = bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainSize; i++) { outputTypeInfos[outputIndex++] = bigTableTypeInfos[bigTableRetainColumnNums[i]]; } - // for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { - // outputTypeInfos[outputIndex++] = smallTableTypeInfos[smallTableRetainKeyColumnNums[i]]; - // } + for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { + outputTypeInfos[outputIndex++] = smallTableKeyTypeInfos[smallTableRetainKeyColumnNums[i]]; + } for (int i = 0; i < smallTableRetainValueColumnNums.length; i++) { outputTypeInfos[outputIndex++] = smallTableValueTypeInfos[smallTableRetainValueColumnNums[i]]; } @@ -221,13 +337,6 @@ public void computeDerived() { } } - public void trimAwaySmallTableValueInfo() { - smallTableValueColumnNames = new String[] {}; - smallTableValueTypeInfos = new TypeInfo[] {}; - smallTableRetainKeyColumnNums = new int[] {}; - smallTableRetainValueColumnNums = new int[] {}; - } - private String[] createOutputColumnNames(int outputColumnCount) { String[] outputColumnNames = new String[outputColumnCount]; int counter = 1; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java new file mode 100644 index 0000000..fdd0342 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * An expression representing a column, only children are evaluated. + */ +public class NoOpExpression extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public NoOpExpression() { + } + + public NoOpExpression(int colNum) { + super(colNum); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + } + + @Override + public String vectorExpressionParameters() { + return "noOpCol" + outputColumnNum + ":" + + getTypeName(outputTypeInfo, outputDataTypePhysicalVariation); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()).build(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..fccc27b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -21,7 +21,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -50,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -57,8 +60,13 @@ import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -93,9 +101,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HashCodeUtil; @@ -120,233 +131,1347 @@ public class TestMapJoinOperator { - /* - * This test collector operator is for MapJoin row-mode. - */ - private class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, false); + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, true); + break; + case 2: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + private boolean goodTestVariation(MapJoinTestDescription testDesc) { + final int smallTableValueSize = testDesc.smallTableRetainValueColumnNums.length; + + switch (testDesc.vectorMapJoinVariation) { + case INNER: + return (smallTableValueSize > 0); + case INNER_BIG_ONLY: + case LEFT_SEMI: + return (smallTableValueSize == 0); + case OUTER: + return true; + case FULL_OUTER: + return true; + default: + throw new RuntimeException( + "Unexpected vectorMapJoinVariation " + testDesc.vectorMapJoinVariation); + } + + } + + @Test + public void testLong0() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong0"); + + return false; + } + + @Test + public void testLong0_NoRegularKeys() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong0_NoRegularKeys"); + + return false; + } + + @Test + public void testLong1() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong1(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, long value; Small Table: no key retained, string value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong1"); + + return false; + } + + @Test + public void testLong2() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong2(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: short key, no value; Small Table: key retained, timestamp value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.timestampTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong2"); + + return false; + } + + + @Test + public void testLong3() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong3"); + + return false; + } + + @Test + public void testLong3_NoRegularKeys() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong3_NoRegularKeys"); + + return false; + } + + @Test + public void testLong4() throws Exception { + long seed = 3982; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong4(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, no value; Small Table: no key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong4"); + + return false; + } + + @Test + public void testLong5() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong5(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong5"); + + return false; + } + + @Test + public void testLong6() throws Exception { + long seed = 9384; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, timestamp value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.timestampTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong6"); + + return false; + } + + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + @Test + public void testMultiKey0() throws Exception { + long seed = 28322; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey0(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; - private final RowTestObjectsMultiSet testRowMultiSet; + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.intTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; - public TestMultiSetCollectorOperator( - ObjectInspector[] outputObjectInspectors, - RowTestObjectsMultiSet testRowMultiSet) { - super(outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + smallTableRetainKeyColumnNums = new int[] {0, 1}; - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; - } + smallTableValueTypeInfos = new TypeInfo[] {}; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + //---------------------------------------------------------------------------------------------- - @Override - public String getName() { - return TestMultiSetCollectorOperator.class.getSimpleName(); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; + } + + @Test + public void testMultiKey1() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - private class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + public boolean doTestMultiKey1(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - private final RowTestObjectsMultiSet testRowMultiSet; + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, - ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) - throws HiveException { - super(outputTypeInfos, outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + TypeInfo[] bigTableTypeInfos = null; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + int[] bigTableKeyColumnNums = null; - @Override - public String getName() { - return TestMultiSetVectorCollectorOperator.class.getSimpleName(); - } - } + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; - private static class KeyConfig { - long seed; - PrimitiveTypeInfo primitiveTypeInfo; - KeyConfig(long seed, PrimitiveTypeInfo primitiveTypeInfo) { - this.seed = seed; - this.primitiveTypeInfo = primitiveTypeInfo; + smallTableValueTypeInfos = + new TypeInfo[] {new DecimalTypeInfo(38, 18)}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey1"); + + return false; } - private static KeyConfig[] longKeyConfigs = new KeyConfig[] { - new KeyConfig(234882L, TypeInfoFactory.longTypeInfo), - new KeyConfig(4600L, TypeInfoFactory.intTypeInfo), - new KeyConfig(98743L, TypeInfoFactory.shortTypeInfo)}; @Test - public void testLong() throws Exception { - for (KeyConfig longKeyConfig : longKeyConfigs) { + public void testMultiKey2() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + hiveConfVariationsDone = + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); } - doTestLong(longKeyConfig.seed, longKeyConfig.primitiveTypeInfo, vectorMapJoinVariation); } - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestLong(long seed, TypeInfo numberTypeInfo, - VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestMultiKey2(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"number1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.longTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testMultiKey0"); + + return false; } @Test - public void testMultiKey() throws Exception { + public void testString0() throws Exception { long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestMultiKey(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestMultiKey(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString0(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; - TypeInfo[] bigTableTypeInfos = + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One plain STRING key column. + bigTableTypeInfos = new TypeInfo[] { - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + bigTableKeyColumnNums = new int[] {0}; - String[] smallTableValueColumnNames = new String[] {"sv1"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testString0"); + + return false; + } + + @Test + public void testString1() throws Exception { + long seed = 3422; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestString1(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + int rowCount = 10; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0}; + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One BINARY key column. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.binaryTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.floatTypeInfo, + new DecimalTypeInfo(38, 18)}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString1"); + + return false; } @Test - public void testString() throws Exception { - long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + public void testString2() throws Exception { + long seed = 7439; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestString(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString2(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; - int[] bigTableRetainColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One STRING key column; Small Table value: NONE (tests INNER_BIG_ONLY, LEFT_SEMI). + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString2"); + + return false; } private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTableRowObjects, @@ -357,16 +1482,36 @@ private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTa } } - private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, - Object[] outputObjects) { + private void addToOutput(MapJoinTestDescription testDesc, + RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects, + RowTestObjectsMultiSet.RowFlag rowFlag) { for (int c = 0; c < outputObjects.length; c++) { - PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); + PrimitiveObjectInspector primitiveObjInsp = + ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); Object outputObject = outputObjects[c]; outputObjects[c] = primitiveObjInsp.copyObject(outputObject); } - expectedTestRowMultiSet.add(new RowTestObjects(outputObjects)); + expectedTestRowMultiSet.add(new RowTestObjects(outputObjects), rowFlag); + } + + private String rowToCsvString(Object[] rowObjects) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rowObjects.length; i++) { + if (sb.length() > 0) { + sb.append(","); + } + Object obj = rowObjects[i]; + if (obj == null) { + sb.append("\\N"); + } else { + sb.append(obj); + } + } + return sb.toString(); } + static int fake; + /* * Simulate the join by driving the test big table data by our test small table HashMap and * create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count). @@ -377,7 +1522,7 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet(); VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; Object[] bigTableRowObjects = new Object[bigTableColumnCount]; @@ -397,20 +1542,26 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); // Form key object array + boolean hasAnyNulls = false; // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins. for (int k = 0; k < bigTableKeyColumnCount; k++) { int keyColumnNum = testDesc.bigTableKeyColumnNums[k]; - bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum]; + Object keyObject = bigTableRowObjects[keyColumnNum]; + if (keyObject == null) { + hasAnyNulls = true; + } + bigTableKeyObjects[k] = keyObject; bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]); } RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects); - if (testData.smallTableKeyHashMap.containsKey(testKey)) { + if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) { int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey); switch (testDesc.vectorMapJoinVariation) { case INNER: case OUTER: + case FULL_OUTER: { // One row per value. ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); @@ -420,36 +1571,46 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + Object[] valueRow = valueList.get(v).getRow(); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } } break; case INNER_BIG_ONLY: - { - // Value count rows. - final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex); - for (int v = 0; v < valueCount; v++) { - Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; - - addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); - } - } - break; case LEFT_SEMI: { - // One row (existence). Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } break; default: @@ -458,9 +1619,10 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript } else { - // No match. + // Big Table non-match. - if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) { + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { // We need to add a non-match row with nulls for small table values. @@ -468,14 +1630,74 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = null; + } + + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = null; + outputObjects[outputColumnNum++] = null; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.LEFT_OUTER); + } + } + } + } + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + System.out.println("*BENCHMARK* ----------------------------------------------------------------------"); + System.out.println("*BENCHMARK* FULL OUTER non-match key count " + + testData.fullOuterAdditionalSmallTableKeys.size()); + + // Fill in non-match Small Table key results. + for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) { + + System.out.println( + "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString()); + + int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey); + + // One row per value. + ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; + + // Non-match Small Table keys produce NULL Big Table columns. + final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; + for (int o = 0; o < bigTableRetainColumnNumsLength; o++) { + outputObjects[o] = null; + } + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + // The output result may include 0, 1, or more small key columns... + Object[] smallKeyObjects = smallTableKey.getRow(); + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; } + + Object[] valueRow = valueList.get(v).getRow(); + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; + for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.FULL_OUTER); } } } @@ -483,67 +1705,336 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript return expectedTestRowMultiSet; } - private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception { + private void generateBigAndSmallTableRowLogLines(MapJoinTestDescription testDesc, + MapJoinTestData testData) throws HiveException { + + // Generate Big Table rows log lines... + VectorExtractRow vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(testDesc.bigTableTypeInfos); + + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + Object[] bigTableRowObjects = new Object[bigTableColumnCount]; + + VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream(); + VectorizedRowBatch batch = testData.getBigTableBatch(); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + batch.reset(); + bigTableBatchStream.fillNext(batch); + + final int size = testData.bigTableBatch.size; + for (int r = 0; r < size; r++) { + vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); + + System.out.println("*BIG TABLE* " + rowToCsvString(bigTableRowObjects)); + } + } + + // Generate Small Table rows log lines... + final int keyKeyColumnNumsLength = + testDesc.bigTableKeyColumnNums.length; + final int smallTableRetainValueLength = + testDesc.smallTableRetainValueColumnNums.length; + final int smallTableLength = keyKeyColumnNumsLength + smallTableRetainValueLength; + for (Entry entry : testData.smallTableKeyHashMap.entrySet()) { + if (smallTableRetainValueLength == 0) { + Object[] smallTableRowObjects = entry.getKey().getRow(); + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } else { + Integer valueIndex = entry.getValue(); + ArrayList valueList = testData.smallTableValues.get(valueIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] smallTableRowObjects = new Object[smallTableLength]; + System.arraycopy(entry.getKey().getRow(), 0, smallTableRowObjects, 0, keyKeyColumnNumsLength); + int outputColumnNum = keyKeyColumnNumsLength; + Object[] valueRow = valueList.get(v).getRow(); + for (int o = 0; o < smallTableRetainValueLength; o++) { + smallTableRowObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } + } + } + } + + private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + + // So stack trace is self-explanatory. + switch (testDesc.vectorMapJoinVariation) { + case INNER: + executeTestInner(testDesc, testData, title); + break; + case INNER_BIG_ONLY: + executeTestInnerBigOnly(testDesc, testData, title); + break; + case LEFT_SEMI: + executeTestLeftSemi(testDesc, testData, title); + break; + case OUTER: + executeTestOuter(testDesc, testData, title); + break; + case FULL_OUTER: + executeTestFullOuter(testDesc, testData, title); + break; + default: + throw new RuntimeException("Unexpected Vector MapJoin variation " + + testDesc.vectorMapJoinVariation); + } + } + + private void executeTestInner(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestInnerBigOnly(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestLeftSemi(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestFullOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData); - // UNDONE: Inner count - System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + - " totalCount " + expectedTestRowMultiSet.getTotalCount()); + generateBigAndSmallTableRowLogLines(testDesc, testData); + + System.out.println("*BENCHMARK* expectedTestRowMultiSet " + + " totalKeyCount " + expectedTestRowMultiSet.getTotalKeyCount() + + " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); // Execute all implementation variations. for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { - executeTestImplementation(mapJoinImplementation, testDesc, testData, - expectedTestRowMultiSet); + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Key match tracking not supported in plain Java HashMap. + continue; + } + switch (mapJoinImplementation) { + case ROW_MODE_HASH_MAP: + executeRowModeHashMap( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case ROW_MODE_OPTIMIZED: + executeRowModeOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case VECTOR_PASS_THROUGH: + executeVectorPassThrough( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_OPTIMIZED: + executeNativeVectorOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_FAST: + executeNativeVectorFast( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + default: + throw new RuntimeException( + "Unexpected vector map join test variation"); + } } } - private boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { - return - (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && - mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + private void executeRowModeHashMap( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_HASH_MAP, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeRowModeOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeVectorPassThrough( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.VECTOR_PASS_THROUGH, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorFast( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_FAST, + testDesc, testData, + expectedTestRowMultiSet, + title); } private void executeTestImplementation( MapJoinTestImplementation mapJoinImplementation, - MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) throws Exception { - System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test"); + System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + + " title " + title); // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); - final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet(); - Operator testCollectorOperator = - (!isVectorOutput ? - new TestMultiSetCollectorOperator( - testDesc.outputObjectInspectors, outputTestRowMultiSet) : - new TestMultiSetVectorCollectorOperator( - testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet)); - - MapJoinOperator operator = + CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc); + MapJoinOperator mapJoinOperator = result.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe; + + CountCollectorTestOperator testCollectorOperator; + if (!isVectorOutput) { + testCollectorOperator = + new TestMultiSetCollectorOperator( + testDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + testCollectorOperator = + new TestMultiSetVectorCollectorOperator( + ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), + testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator); + + CountCollectorTestOperator interceptTestCollectorOperator = null; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + + if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Not supported. + return; + } + + // Wire in FULL OUTER Intercept. + interceptTestCollectorOperator = + MapJoinTestConfig.addFullOuterIntercept( + mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, + mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } else { + + // Invoke initializeOp methods. + mapJoinOperator.initialize( + testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + } if (!isVectorOutput) { - MapJoinTestData.driveBigTableData(testDesc, testData, operator); + MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator); } else { - MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator); + MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator); + } + + if (!testCollectorOperator.getIsClosed()) { + Assert.fail("collector operator not closed"); + } + if (testCollectorOperator.getIsAborted()) { + Assert.fail("collector operator aborted"); + } + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + if (!interceptTestCollectorOperator.getIsClosed()) { + Assert.fail("intercept collector operator not closed"); + } + if (interceptTestCollectorOperator.getIsAborted()) { + Assert.fail("intercept collector operator aborted"); + } } System.out.println("*BENCHMARK* executeTestImplementation row count " + - ((CountCollectorTestOperator) testCollectorOperator).getRowCount()); + testCollectorOperator.getRowCount()); // Verify the output! - if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) { - System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation); + String option = ""; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name(); + } + if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) { + System.out.println("*BENCHMARK* " + title + " verify failed" + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); + expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); } else { - System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation); + System.out.println("*BENCHMARK* " + title + " verify succeeded " + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); } } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 09dcb83..3ce061d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -30,11 +30,15 @@ import junit.framework.TestCase; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; @@ -197,6 +201,20 @@ public long getKey(int index) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastLongHashMap map, int index, MatchTracker matchTracker) { + FastLongHashMapElement element = array[index]; + long longKey = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(longKey, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastLongHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -204,18 +222,77 @@ public void verify(VectorMapJoinFastLongHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(long searchLong) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastLongHashMapElement element = array[index]; - long key = element.getKey(); - List values = element.getValues(); + long longKey = element.getKey(); + if (longKey == searchLong) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastLongHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + long longKey = nonMatchedIterator.getNonMatchedLongKey(); + int index = findKeyInArray(longKey); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastLongHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } @@ -247,6 +324,11 @@ public int getValueCount() { public void addValue(byte[] value) { values.add(value); } + + @Override + public String toString() { + return "Key length " + key.length + ", value count " + values.size(); + } } /* @@ -310,6 +392,21 @@ public void add(byte[] key, byte[] value) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastBytesHashMap map, int index, + MatchTracker matchTracker) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastBytesHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -317,18 +414,82 @@ public void verify(VectorMapJoinFastBytesHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(byte[] searchKeyBytes, int searchKeyOffset, int searchKeyLength) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastBytesHashMapElement element = array[index]; - byte[] key = element.getKey(); - List values = element.getValues(); + byte[] keyBytes = element.getKey(); + if (keyBytes.length == searchKeyLength && + StringExpr.equal( + keyBytes, 0, keyBytes.length, + searchKeyBytes, searchKeyOffset, searchKeyLength)) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastBytesHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();; + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + int index = findKeyInArray(keyBytes, keyOffset, keyLength); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastBytesHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 0000000..6833553 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index cbd77d1..fb8be91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -37,7 +37,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -65,7 +65,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -91,7 +91,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -125,7 +125,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(CAPACITY, 1f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(1, 0.0000001f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -227,7 +227,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -242,7 +242,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java new file mode 100644 index 0000000..8e53501 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(33221); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testExpand() throws Exception { + random = new Random(5227); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java index bbb5da0..f64d180 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -39,7 +39,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -66,7 +66,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -94,7 +94,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -126,7 +126,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -225,7 +225,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -240,7 +240,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index 793a676..ab1a829 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,8 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -109,15 +111,25 @@ public static GenerateCategory generateCategoryFromPrimitiveCategory(PrimitiveCa } private GenerateCategory category; + private boolean allowNulls; public GenerateType(GenerateCategory category) { this.category = category; } + public GenerateType(GenerateCategory category, boolean allowNulls) { + this.category = category; + this.allowNulls = allowNulls; + } + public GenerateCategory getCategory() { return category; } + public boolean getAllowNulls() { + return allowNulls; + } + /* * BOOLEAN .. LONG: Min and max. */ @@ -189,16 +201,24 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, break; case STRING: + case CHAR: + case VARCHAR: + case BINARY: colVector = new BytesColumnVector(); break; - // UNDONE - case DATE: case TIMESTAMP: - case BINARY: + colVector = new TimestampColumnVector(); + break; + case DECIMAL: - case VARCHAR: - case CHAR: + colVector = new DecimalColumnVector(38, 18); + break; + + // UNDONE + case DATE: + + case LIST: case MAP: case STRUCT: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index 1064b19..22a1cd8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -22,15 +22,19 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -39,6 +43,7 @@ private GenerateType[] generateTypes; private int[] columnNums; private Object[] arrays; + private boolean[][] isNullArrays; public VectorColumnGroupGenerator(int columnNum, GenerateType generateType) { columnNums = new int[] {columnNum}; @@ -61,6 +66,7 @@ public VectorColumnGroupGenerator(int startColumnNum, GenerateType[] generateTyp private void allocateArrays(int size) { arrays = new Object[generateTypes.length]; + isNullArrays = new boolean[generateTypes.length][]; for (int i = 0; i < generateTypes.length; i++) { GenerateType generateType = generateTypes[i]; GenerateCategory category = generateType.getCategory(); @@ -90,24 +96,34 @@ private void allocateArrays(int size) { case STRING: array = new String[size]; break; + case BINARY: + array = new byte[size][]; + break; case TIMESTAMP: array = new Timestamp[size]; break; + case CHAR: + array = new HiveChar[size]; + break; + case VARCHAR: + array = new HiveVarchar[size]; + break; + case DECIMAL: + array = new HiveDecimalWritable[size]; + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unexpected generate category " + category); } arrays[i] = array; + isNullArrays[i] = new boolean[size]; } } @@ -141,16 +157,24 @@ public void clearColumnValueArrays() { case STRING: Arrays.fill(((String[]) array), null); break; + case BINARY: + Arrays.fill(((byte[][]) array), null); + break; case TIMESTAMP: Arrays.fill(((Timestamp[]) array), null); break; + case CHAR: + Arrays.fill(((HiveChar[]) array), null); + break; + case VARCHAR: + Arrays.fill(((HiveVarchar[]) array), null); + break; + case DECIMAL: + Arrays.fill(((HiveDecimalWritable[]) array), null); + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -170,6 +194,11 @@ public void generateRowValues(int rowIndex, Random random) { private void generateRowColumnValue(int rowIndex, int columnIndex, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + if (allowNulls && random.nextInt(100) < 5) { + isNullArrays[columnIndex][rowIndex] = true; + return; + } Object array = arrays[columnIndex]; switch (category) { case BOOLEAN: @@ -230,6 +259,13 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case BINARY: + { + byte[] value = RandomTypeUtil.getRandBinary(random, 10); + ((byte[][]) array)[rowIndex] = value; + } + break; + case TIMESTAMP: { Timestamp value = RandomTypeUtil.getRandTimestamp(random); @@ -237,14 +273,36 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case CHAR: + { + // UNDONE: Use CharTypeInfo.maxLength + HiveChar value = + new HiveChar(RandomTypeUtil.getRandString(random), 10); + ((HiveChar[]) array)[rowIndex] = value; + } + break; + + case VARCHAR: + { + // UNDONE: Use VarcharTypeInfo.maxLength + HiveVarchar value = + new HiveVarchar(RandomTypeUtil.getRandString(random), 10); + ((HiveVarchar[]) array)[rowIndex] = value; + } + break; + + case DECIMAL: + { + HiveDecimalWritable value = + new HiveDecimalWritable(RandomTypeUtil.getRandHiveDecimal(random)); + ((HiveDecimalWritable[]) array)[rowIndex] = value; + } + break; + // UNDONE case DATE: // UNDONE: Needed to longTest? - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -263,7 +321,15 @@ public void fillDownRowValues(int rowIndex, int seriesCount, Random random) { private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCount, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); Object array = arrays[columnIndex]; + boolean[] isNull = isNullArrays[columnIndex]; + if (allowNulls && isNull[rowIndex]) { + for (int i = 1; i < seriesCount; i++) { + isNull[rowIndex + i] = true; + } + return; + } switch (category) { case BOOLEAN: { @@ -337,6 +403,15 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + byte[] value = byteArrayArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + byteArrayArray[rowIndex + i] = value; + } + } + break; case TIMESTAMP: { Timestamp[] timestampArray = ((Timestamp[]) array); @@ -346,15 +421,37 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + HiveChar value = hiveCharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveCharArray[rowIndex + i] = value; + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveVarcharArray = ((HiveVarchar[]) array); + HiveVarchar value = hiveVarcharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveVarcharArray[rowIndex + i] = value; + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + HiveDecimalWritable value = hiveDecimalWritableArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveDecimalWritableArray[rowIndex + i] = value; + } + } + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: @@ -389,6 +486,16 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde GenerateType generateType = generateTypes[logicalColumnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + boolean[] isNull = isNullArrays[logicalColumnIndex]; + if (allowNulls) { + for (int i = 0; i < size; i++) { + if (isNull[i]) { + colVector.isNull[i] = true; + colVector.noNulls = false; + } + } + } Object array = arrays[logicalColumnIndex]; switch (category) { case BOOLEAN: @@ -396,7 +503,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde boolean[] booleanArray = ((boolean[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = (booleanArray[i] ? 1 : 0); + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = (booleanArray[i] ? 1 : 0); + } } } break; @@ -405,7 +516,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde byte[] byteArray = ((byte[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = byteArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = byteArray[i]; + } } } break; @@ -414,7 +529,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde short[] shortArray = ((short[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = shortArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = shortArray[i]; + } } } break; @@ -423,7 +542,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde int[] intArray = ((int[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = intArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = intArray[i]; + } } } break; @@ -432,7 +555,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde long[] longArray = ((long[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = longArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = longArray[i]; + } } } break; @@ -441,7 +568,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde float[] floatArray = ((float[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = floatArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = floatArray[i]; + } } } break; @@ -450,7 +581,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde double[] doubleArray = ((double[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = doubleArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = doubleArray[i]; + } } } break; @@ -459,8 +594,22 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde String[] stringArray = ((String[]) array); BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); for (int i = 0; i < size; i++) { - byte[] bytes = stringArray[i].getBytes(); - bytesColVec.setVal(i, bytes); + if (!isNull[i]) { + byte[] bytes = stringArray[i].getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = byteArrayArray[i]; + bytesColVec.setVal(i, bytes); + } } } break; @@ -469,8 +618,46 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde Timestamp[] timestampArray = ((Timestamp[]) array); TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector); for (int i = 0; i < size; i++) { - Timestamp timestamp = timestampArray[i]; - timestampColVec.set(i, timestamp); + if (!isNull[i]) { + Timestamp timestamp = timestampArray[i]; + timestampColVec.set(i, timestamp); + } + } + } + break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + HiveDecimalWritable decWritable = hiveDecimalWritableArray[i]; + decimalColVec.set(i, decWritable); + } } } break; @@ -479,16 +666,12 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unepected generate category " + category); } } } \ No newline at end of file diff --git ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..f3b31b4 --- /dev/null +++ ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -0,0 +1,434 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index ff4cde2..ee9a89c 100644 --- ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -22,3 +22,9 @@ select a.* from alltypesorc a left outer join src b on a.cint = cast(b.key as int) and (a.cint < 100) limit 1; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1; diff --git ql/src/test/queries/clientpositive/vector_full_outer_join.q ql/src/test/queries/clientpositive/vector_full_outer_join.q new file mode 100644 index 0000000..63565f1 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -0,0 +1,80 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- SORT_QUERY_RESULTS + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +-- Omit tjoin2.c1 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +-- Omit tjoin2.c1 and tjoin2.c2 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q new file mode 100644 index 0000000..880b95f --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -0,0 +1,434 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for FAST hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..cb25989 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -0,0 +1,434 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q new file mode 100644 index 0000000..7041b44 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -0,0 +1,434 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized PASS-TRUE Mode MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index ccceb36..84f656b 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; @@ -20,14 +21,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +37,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +45,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +53,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +61,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorized_join46.q ql/src/test/queries/clientpositive/vectorized_join46.q index af155cc..7be2b0e 100644 --- ql/src/test/queries/clientpositive/vectorized_join46.q +++ ql/src/test/queries/clientpositive/vectorized_join46.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; @@ -15,7 +16,7 @@ INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), -- Basic outer join -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); @@ -25,7 +26,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); -- Conjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -39,7 +40,7 @@ ON (test1.value=test2.value AND test2.key between 100 and 102); -- Conjunction with pred on single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 @@ -51,7 +52,7 @@ ON (test1.key between 100 and 102 AND test2.key between 100 and 102); -- Conjunction with pred on multiple inputs and none (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); @@ -61,7 +62,7 @@ FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); -- Condition on one input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); @@ -71,7 +72,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -85,7 +86,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -97,7 +98,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -109,7 +110,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -123,7 +124,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -137,7 +138,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -149,7 +150,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -161,7 +162,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -175,7 +176,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -189,7 +190,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -201,7 +202,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -213,7 +214,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value diff --git ql/src/test/queries/clientpositive/vectorized_join46_mr.q ql/src/test/queries/clientpositive/vectorized_join46_mr.q new file mode 100644 index 0000000..7be2b0e --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_join46_mr.q @@ -0,0 +1,228 @@ +set hive.cli.print.header=true; +set hive.vectorized.execution.enabled=true; +set hive.auto.convert.join=true; +set hive.strict.checks.cartesian.product=false; +set hive.join.emit.interval=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE test1 (key INT, value INT, col_1 STRING); +INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); + +CREATE TABLE test2 (key INT, value INT, col_2 STRING); +INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None'); + + +-- Basic outer join +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +-- Conjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on multiple inputs and none (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +-- Condition on one input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); diff --git ql/src/test/queries/clientpositive/vectorized_join46_one_full.q ql/src/test/queries/clientpositive/vectorized_join46_one_full.q new file mode 100644 index 0000000..cb9e0f0 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_join46_one_full.q @@ -0,0 +1,68 @@ +set hive.cli.print.header=true; +set hive.vectorized.execution.enabled=true; +set hive.auto.convert.join=true; +set hive.strict.checks.cartesian.product=false; +set hive.join.emit.interval=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE test1 (key INT, value INT, col_1 STRING); +INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); + +CREATE TABLE test2 (key INT, value INT, col_2 STRING); +INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None'); + + +-- Disjunction with pred on multiple inputs and single inputs (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); diff --git ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out new file mode 100644 index 0000000..0ae9df9 --- /dev/null +++ ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: s + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 diff --git ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..4afac08 --- /dev/null +++ ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,5855 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index b63b25f..20acb91 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -412,7 +412,7 @@ NULL 6 PREHOOK: query: explain select * -from alltypesorc a join alltypesorc b on a.cint = b.cint +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint where a.cint between 1000000 and 3000000 and b.cbigint is not null order by a.cint @@ -420,7 +420,7 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * -from alltypesorc a join alltypesorc b on a.cint = b.cint +from alltypesorc a left outer join alltypesorc b on a.cint = b.cint where a.cint between 1000000 and 3000000 and b.cbigint is not null order by a.cint @@ -491,6 +491,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -623,6 +624,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -756,6 +758,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 4f557d3..2abe505 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -204,3 +204,99 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 UDFToInteger(_col0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 0000000..702f72c --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1172 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableOuterKeyMapping: 1 -> 4 + bigTableRetainedColumnNums: [0, 1, 2, 4] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5] + smallTableMapping: [5] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableOuterKeyMapping: 1 -> 4 + bigTableRetainedColumnNums: [0, 1, 2, 4] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5] + smallTableMapping: [5] + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4] + smallTableMapping: [4] + outputColumnNames: _col0, _col1, _col2, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4] + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 +0 10 15 BB +0 10 15 FF +1 20 25 NULL +2 NULL 50 NULL +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2] + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 4 Data size: 17 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 +0 10 15 +0 10 15 +1 20 25 +2 NULL 50 diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 0000000..ad0eb95 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,8738 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 2:bigint, 3:date + smallTableValueMapping: 3:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint, 3:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 2:smallint, 3:timestamp + smallTableValueMapping: 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 3:int, 4:decimal(38,18) + smallTableValueMapping: 4:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 3:int, 4:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 3:smallint, 4:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:smallint, 4:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + smallTableValueMapping: 7:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 2:string, 3:date, 4:timestamp + smallTableValueMapping: 3:date, 4:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:date, 4:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterIntersectStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..4afac08 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,5855 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 0000000..4afac08 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,5855 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..0b18b64 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -85,10 +85,11 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] projectedOutputColumnNums: [0] outputColumnNames: _col1 input vertices: @@ -251,11 +252,12 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableRetainColumnNums: [0] bigTableValueColumnNums: [0] className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] projectedOutputColumnNums: [0] outputColumnNames: _col0 input vertices: @@ -458,12 +460,13 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -678,13 +681,14 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [0, 1] bigTableValueColumnNums: [0, 1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] projectedOutputColumnNums: [3, 0, 0, 1] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -787,13 +791,14 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [0, 1] bigTableValueColumnNums: [0, 1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -957,13 +962,14 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [0, 1] bigTableValueColumnNums: [0, 1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1126,13 +1132,14 @@ STAGE PLANS: 1 _col1 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [1] bigTableValueColumnNums: [1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1347,13 +1354,14 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [0, 1] bigTableValueColumnNums: [0, 1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -1516,13 +1524,14 @@ STAGE PLANS: 1 _col0 (type: int) Map Join Vectorization: bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] + bigTableRetainColumnNums: [1] bigTableValueColumnNums: [1] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..1985e1b 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -1006,6 +1006,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1236,6 +1239,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1466,6 +1472,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1696,6 +1705,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 821ea3a..1edee42 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -167,15 +170,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -261,15 +266,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -293,6 +300,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -350,6 +358,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -357,6 +371,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -371,9 +386,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE +======= + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -386,6 +407,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -403,15 +430,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -435,6 +464,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -492,6 +522,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -499,6 +535,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -513,9 +550,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE +======= + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -528,6 +571,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -545,15 +594,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -577,6 +628,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -595,9 +647,19 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] className: VectorMapJoinOuterLongOperator native: true +<<<<<<< HEAD nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true +======= + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4] + smallTableMapping: [4] +>>>>>>> 3ddd925... more outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -631,6 +693,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -638,6 +706,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -652,9 +721,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE +======= + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -667,6 +742,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -684,15 +765,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -716,6 +799,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -734,9 +818,19 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumnNums: [1] + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] className: VectorMapJoinOuterLongOperator native: true +<<<<<<< HEAD nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true +======= + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4] + smallTableMapping: [4] +>>>>>>> 3ddd925... more outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -770,6 +864,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -777,6 +877,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -791,9 +892,15 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumnNums: [1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE +======= + valueColumnNums: [2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -806,6 +913,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -823,6 +936,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..f065f2e 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -5125,6 +5125,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5272,6 +5275,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5419,6 +5425,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5568,6 +5577,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9119,6 +9131,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9369,6 +9384,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9619,6 +9637,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9871,6 +9892,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -13649,6 +13673,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -13899,6 +13926,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -14149,6 +14179,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -14401,6 +14434,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18182,6 +18218,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18432,6 +18471,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18682,6 +18724,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18934,6 +18979,9 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 50e6a85..14528bb 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -115,7 +115,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + smallTableValueMapping: [4] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -313,7 +313,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + smallTableValueMapping: [3] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..f61b472 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -275,7 +275,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + smallTableValueMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -425,16 +425,15 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE +<<<<<<< HEAD TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] +======= +>>>>>>> ec1dd89... more Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -442,6 +441,7 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) +<<<<<<< HEAD Map Join Vectorization: bigTableKeyColumnNums: [0] bigTableRetainedColumnNums: [0] @@ -450,37 +450,27 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [0] +======= +>>>>>>> 3ddd925... more outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 225 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 225 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: MAPJOIN operator: Error encountered calling constructor VectorMapJoinOuterLongOperator(CompilationOpContext, OperatorDesc, VectorizationContext, VectorDesc) + vectorized: false Map 2 Map Operator Tree: TableScan @@ -691,16 +681,15 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE +<<<<<<< HEAD TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] +======= +>>>>>>> ec1dd89... more Select Operator expressions: ctinyint (type: tinyint), cint (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 15 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -708,6 +697,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) +<<<<<<< HEAD Map Join Vectorization: bigTableKeyColumnNums: [2] bigTableRetainedColumnNums: [0] @@ -716,6 +706,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [0] +======= +>>>>>>> 3ddd925... more outputColumnNames: _col0 input vertices: 1 Map 3 @@ -726,6 +718,7 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) +<<<<<<< HEAD Map Join Vectorization: bigTableKeyColumnNums: [0] bigTableRetainedColumnNums: [0] @@ -734,49 +727,29 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [0] +======= +>>>>>>> 3ddd925... more outputColumnNames: _col0 input vertices: 1 Map 4 Statistics: Num rows: 420 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0:tinyint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 2] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: MAPJOIN operator: Error encountered calling constructor VectorMapJoinOuterLongOperator(CompilationOpContext, OperatorDesc, VectorizationContext, VectorDesc) + vectorized: false Map 3 Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..6bb5ad8 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -266,16 +266,15 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE +<<<<<<< HEAD TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] +======= +>>>>>>> ec1dd89... more Select Operator expressions: cint (type: int), cbigint (type: bigint) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3] Statistics: Num rows: 20 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -283,6 +282,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) +<<<<<<< HEAD Map Join Vectorization: bigTableKeyColumnNums: [2] bigTableRetainedColumnNums: [3] @@ -291,6 +291,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [3] +======= +>>>>>>> 3ddd925... more outputColumnNames: _col1 input vertices: 1 Map 3 @@ -301,6 +303,7 @@ STAGE PLANS: keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) +<<<<<<< HEAD Map Join Vectorization: bigTableKeyColumnNums: [3] bigTableRetainedColumnNums: [3] @@ -309,49 +312,29 @@ STAGE PLANS: native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true projectedOutputColumnNums: [3] +======= +>>>>>>> 3ddd925... more outputColumnNames: _col1 input vertices: 1 Map 4 Statistics: Num rows: 142 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized, llap + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [2, 3] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] + notVectorizedReason: MAPJOIN operator: Error encountered calling constructor VectorMapJoinOuterLongOperator(CompilationOpContext, OperatorDesc, VectorizationContext, VectorDesc) + vectorized: false Map 3 Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index a134b19..c95e03b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -16,9 +16,16 @@ POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test1 +<<<<<<< HEAD POSTHOOK: Lineage: test1.col_1 SCRIPT [] POSTHOOK: Lineage: test1.key SCRIPT [] POSTHOOK: Lineage: test1.value SCRIPT [] +======= +POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +>>>>>>> 3ddd925... more PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -37,19 +44,32 @@ POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test2 +<<<<<<< HEAD POSTHOOK: Lineage: test2.col_2 SCRIPT [] POSTHOOK: Lineage: test2.key SCRIPT [] POSTHOOK: Lineage: test2.value SCRIPT [] PREHOOK: query: EXPLAIN +======= +POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +>>>>>>> 3ddd925... more SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -66,47 +86,136 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableOuterKeyMapping: 1 -> 5 + bigTableRetainedColumnNums: [0, 1, 2, 5] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 6] + smallTableValueMapping: [4, 6] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +<<<<<<< HEAD Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE +======= + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -128,6 +237,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -136,20 +246,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -166,11 +281,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Map Join Operator condition map: Left Outer Join 0 to 1 @@ -180,39 +310,118 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableKeyColumnNums: [1] + bigTableOuterKeyMapping: 1 -> 5 + bigTableRetainedColumnNums: [0, 1, 2, 5] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 6] + smallTableValueMapping: [4, 6] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE +======= + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] +>>>>>>> 3ddd925... more Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE +======= + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -238,6 +447,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL @@ -245,18 +455,23 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -273,11 +488,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Map Join Operator condition map: Left Outer Join 0 to 1 @@ -287,37 +517,113 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 6] + smallTableValueMapping: [4, 5, 6] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE +======= + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 2310 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] +>>>>>>> 3ddd925... more Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE +======= + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -342,22 +648,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -374,47 +686,136 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Map Join Operator condition map: Right Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumnNums: [1] + bigTableOuterKeyMapping: 1 -> 5 + bigTableRetainedColumnNums: [0, 1, 2, 5] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [4, 5, 6, 0, 1, 2] + smallTableValueMapping: [4, 6] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +<<<<<<< HEAD Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE +======= + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 1267 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Stage: Stage-0 Fetch Operator @@ -436,6 +837,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del @@ -443,16 +845,21 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -469,11 +876,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Map Join Operator condition map: Left Outer Join 0 to 1 @@ -483,23 +905,56 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableRetainedColumnNums: [0, 1, 2] + bigTableValueColumnNums: [0, 1, 2] + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + projectedOutputColumnNums: [0, 1, 2, 4, 5, 6] + smallTableValueMapping: [4, 5, 6] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +<<<<<<< HEAD Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE +======= + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 9240 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -508,9 +963,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -533,6 +1027,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -546,20 +1041,25 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -601,10 +1101,17 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -613,9 +1120,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -642,6 +1188,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -656,18 +1203,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,10 +1261,17 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -721,9 +1280,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -748,6 +1346,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -762,18 +1361,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -815,10 +1419,17 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -827,9 +1438,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -854,6 +1504,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -862,20 +1513,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -917,23 +1573,70 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -959,6 +1662,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -967,20 +1671,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -997,6 +1706,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1005,9 +1715,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1037,6 +1786,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1063,6 +1818,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1077,18 +1833,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1105,6 +1866,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1113,9 +1875,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1145,6 +1946,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1169,6 +1976,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1180,18 +1988,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1208,6 +2021,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1216,9 +2030,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1248,6 +2101,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1272,6 +2131,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1282,20 +2142,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1312,19 +2177,60 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1354,6 +2260,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1379,26 +2291,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1415,6 +2333,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1423,13 +2342,53 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -1438,9 +2397,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1460,6 +2458,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1486,6 +2487,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1500,18 +2502,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1528,6 +2535,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1536,13 +2544,53 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -1551,9 +2599,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1573,6 +2660,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1597,6 +2687,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1611,18 +2702,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1639,6 +2735,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) @@ -1647,13 +2744,53 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) @@ -1662,9 +2799,48 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1684,6 +2860,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1708,6 +2887,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1718,20 +2898,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1748,36 +2933,118 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 6 Data size: 1152 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE +======= + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 2] + Statistics: Num rows: 4 Data size: 768 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 3ddd925... more value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1797,6 +3064,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1822,6 +3092,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 5e25c47..953604c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -181,7 +181,9 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { if ((nextFree + length) > buffer.length) { increaseBufferSpace(length); } - System.arraycopy(sourceBuf, start, buffer, nextFree, length); + if (length > 0) { + System.arraycopy(sourceBuf, start, buffer, nextFree, length); + } vector[elementNum] = buffer; this.start[elementNum] = nextFree; this.length[elementNum] = length;