diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java index 263770e877..89db530f54 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/OpTraitsRulesProcFactory.java @@ -308,44 +308,40 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, public static class SelectRule implements NodeProcessor { - boolean processSortCols = false; - // For bucket columns // If all the columns match to the parent, put them in the bucket cols // else, add empty list. // For sort columns // Keep the subset of all the columns as long as order is maintained. public List> getConvertedColNames( - List> parentColNames, SelectOperator selOp) { + List> parentColNames, SelectOperator selOp, boolean processSortCols) { List> listBucketCols = new ArrayList<>(); - if (selOp.getColumnExprMap() != null) { - if (parentColNames != null) { - for (List colNames : parentColNames) { - List bucketColNames = new ArrayList<>(); - boolean found = false; - for (String colName : colNames) { - for (Entry entry : selOp.getColumnExprMap().entrySet()) { - if ((entry.getValue() instanceof ExprNodeColumnDesc) && - (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) { - bucketColNames.add(entry.getKey()); - found = true; - break; - } - } - if (!found) { - // Bail out on first missed column. - break; - } - } - if (!processSortCols && !found) { - // While processing bucket columns, atleast one bucket column - // missed. This results in a different bucketing scheme. - // Add empty list - listBucketCols.add(new ArrayList<>()); - } else { - listBucketCols.add(bucketColNames); + for (List colNames : parentColNames) { + List bucketColNames = new ArrayList<>(); + boolean found = false; + for (String colName : colNames) { + // Reset found + found = false; + for (Entry entry : selOp.getColumnExprMap().entrySet()) { + if ((entry.getValue() instanceof ExprNodeColumnDesc) && + (((ExprNodeColumnDesc) (entry.getValue())).getColumn().equals(colName))) { + bucketColNames.add(entry.getKey()); + found = true; + break; } } + if (!found) { + // Bail out on first missed column. + break; + } + } + if (!processSortCols && !found) { + // While processing bucket columns, atleast one bucket column + // missed. This results in a different bucketing scheme. + // Add empty list + listBucketCols.add(new ArrayList<>()); + } else { + listBucketCols.add(bucketColNames); } } @@ -363,13 +359,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List> listSortCols = null; if (selOp.getColumnExprMap() != null) { if (parentBucketColNames != null) { - listBucketCols = getConvertedColNames(parentBucketColNames, selOp); + listBucketCols = getConvertedColNames(parentBucketColNames, selOp, false); } List> parentSortColNames = selOp.getParentOperators().get(0).getOpTraits().getSortCols(); if (parentSortColNames != null) { - processSortCols = true; - listSortCols = getConvertedColNames(parentSortColNames, selOp); + listSortCols = getConvertedColNames(parentSortColNames, selOp, true); } } diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index c16b14342a..cdd8f64634 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -164,14 +164,14 @@ where src.key not in ( select key from src s1 where s1.key > '2') order by key PREHOOK: type: QUERY PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 0 val_0 0 val_0 @@ -547,7 +547,7 @@ where b.p_name not in order by p_mfgr, b.p_name PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select p_mfgr, b.p_name, p_size from part b where b.p_name not in @@ -558,7 +558,7 @@ where b.p_name not in order by p_mfgr, b.p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### Manufacturer#1 almond antique chartreuse lavender yellow 34 Manufacturer#1 almond antique salmon chartreuse burlywood 6 Manufacturer#1 almond aquamarine burnished black steel 28 @@ -818,7 +818,7 @@ part where part.p_size not in order by p_name, p_size PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select p_name, p_size from part where part.p_size not in @@ -829,7 +829,7 @@ part where part.p_size not in order by p_name, p_size POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### almond antique blue firebrick mint 31 almond antique burnished rose metallic 2 almond antique burnished rose metallic 2 @@ -1191,7 +1191,7 @@ from part b where b.p_size not in ) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select p_mfgr, p_name, p_size from part b where b.p_size not in (select min(p_size) @@ -1200,7 +1200,7 @@ from part b where b.p_size not in ) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### Manufacturer#1 almond antique salmon chartreuse burlywood 6 Manufacturer#1 almond aquamarine burnished black steel 28 Manufacturer#1 almond antique chartreuse lavender yellow 34 @@ -1229,7 +1229,7 @@ where li.l_linenumber = 1 and group by li.l_partkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select li.l_partkey, count(*) from lineitem li where li.l_linenumber = 1 and @@ -1237,7 +1237,7 @@ where li.l_linenumber = 1 and group by li.l_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 22630 1 119477 1 450 1 @@ -1261,14 +1261,14 @@ where not src.key in ( select key from src s1 where s1.key > '2') order by key PREHOOK: type: QUERY PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from src where not src.key in ( select key from src s1 where s1.key > '2') order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 0 val_0 0 val_0 @@ -1577,14 +1577,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@t1_v PREHOOK: Input: default@t2_v -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Input: default@t2_v -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select (p_size*p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select (p_size*p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 @@ -1754,11 +1754,11 @@ STAGE PLANS: PREHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select (p_size*p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select (p_size*p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle @@ -1969,11 +1969,11 @@ Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_partkey POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful 40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s @@ -2164,11 +2164,11 @@ Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h @@ -2421,11 +2421,11 @@ STAGE PLANS: PREHOOK: query: select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 26 Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) @@ -2601,11 +2601,11 @@ Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir PREHOOK: query: explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) PREHOOK: type: QUERY @@ -2848,11 +2848,11 @@ STAGE PLANS: PREHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey ) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull @@ -3034,11 +3034,11 @@ STAGE PLANS: PREHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey ) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type+2 from part where part.p_brand = fpart.brand) PREHOOK: type: QUERY POSTHOOK: query: explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type+2 from part where part.p_brand = fpart.brand) @@ -3204,11 +3204,11 @@ STAGE PLANS: PREHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type+2 from part where part.p_brand = fpart.brand) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type+2 from part where part.p_brand = fpart.brand) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) PREHOOK: type: QUERY POSTHOOK: query: explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size) @@ -3454,11 +3454,11 @@ STAGE PLANS: PREHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size+1) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size+1) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### almond aquamarine yellow dodger mint almond aquamarine dodger light gainsboro almond antique misty red olive @@ -3724,11 +3724,11 @@ Warning: Shuffle Join MERGEJOIN[58][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 438 3 417 3 403 3 @@ -4016,11 +4016,11 @@ STAGE PLANS: PREHOOK: query: select key, count(*) from src where value NOT IN (select concat('v', value) from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select key, count(*) from src where value NOT IN (select concat('v', value) from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 438 3 417 3 403 3 @@ -4239,11 +4239,11 @@ Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously @@ -4451,11 +4451,11 @@ Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from part where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand, p_partkey limit 4 POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously @@ -4669,12 +4669,12 @@ PREHOOK: query: select * from src where key NOT IN (select p_name from part UNIO PREHOOK: type: QUERY PREHOOK: Input: default@part PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part) POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 0 val_0 0 val_0 @@ -5402,11 +5402,11 @@ STAGE PLANS: PREHOOK: query: select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) PREHOOK: type: QUERY PREHOOK: Input: default@part -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand) POSTHOOK: type: QUERY POSTHOOK: Input: default@part -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 26 PREHOOK: query: CREATE TABLE t1_n0 (c1 INT, c2 CHAR(100)) PREHOOK: type: CREATETABLE @@ -5592,12 +5592,12 @@ PREHOOK: query: SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0) PREHOOK: type: QUERY PREHOOK: Input: default@t1_n0 PREHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n0 POSTHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: explain SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1) PREHOOK: type: QUERY POSTHOOK: query: explain SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1) @@ -5811,12 +5811,12 @@ PREHOOK: query: SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where PREHOOK: type: QUERY PREHOOK: Input: default@t1_n0 PREHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: SELECT c1 FROM t1_n0 WHERE c1 NOT IN (SELECT c1 FROM t2_n0 where t1_n0.c2=t2_n0.c1) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n0 POSTHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### NULL 1 2 @@ -6031,12 +6031,12 @@ PREHOOK: query: select t1_n0.a from t1_n0 where t1_n0.b NOT IN (select t2_n0.a f PREHOOK: type: QUERY PREHOOK: Input: default@t1_n0 PREHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select t1_n0.a from t1_n0 where t1_n0.b NOT IN (select t2_n0.a from t2_n0 where t2_n0.b=t1_n0.a) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n0 POSTHOOK: Input: default@t2_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: drop table t1_n0 PREHOOK: type: DROPTABLE PREHOOK: Input: default@t1_n0 @@ -6102,9 +6102,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 7 <- Map 3 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6160,7 +6162,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: fixob @@ -6223,7 +6225,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 7 + Reducer 5 + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -6234,27 +6237,42 @@ STAGE PLANS: expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 6 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 8 + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6266,12 +6284,12 @@ PREHOOK: query: select * from fixOb where j NOT IN (select i from t7 where t7.j= PREHOOK: type: QUERY PREHOOK: Input: default@fixob PREHOOK: Input: default@t7 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j) POSTHOOK: type: QUERY POSTHOOK: Input: default@fixob POSTHOOK: Input: default@t7 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### -1 15 PREHOOK: query: drop table t7 PREHOOK: type: DROPTABLE @@ -6320,9 +6338,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) Reducer 3 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6370,7 +6390,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: t_n0 @@ -6434,13 +6454,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -6451,20 +6465,42 @@ STAGE PLANS: expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) + value expressions: _col1 (type: boolean) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6475,11 +6511,11 @@ STAGE PLANS: PREHOOK: query: select t_n0.i from t_n0 where t_n0.j NOT IN (select t1_n0.i from t_n0 t1_n0 where t1_n0.j=t_n0.j) PREHOOK: type: QUERY PREHOOK: Input: default@t_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select t_n0.i from t_n0 where t_n0.j NOT IN (select t1_n0.i from t_n0 t1_n0 where t1_n0.j=t_n0.j) POSTHOOK: type: QUERY POSTHOOK: Input: default@t_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 7 1 4 @@ -6632,11 +6668,11 @@ STAGE PLANS: PREHOOK: query: select t_n0.i from t_n0 where t_n0.i NOT IN (select t1_n0.i from t_n0 t1_n0 where t1_n0.j=t_n0.j) PREHOOK: type: QUERY PREHOOK: Input: default@t_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select t_n0.i from t_n0 where t_n0.i NOT IN (select t1_n0.i from t_n0 t1_n0 where t1_n0.j=t_n0.j) POSTHOOK: type: QUERY POSTHOOK: Input: default@t_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 7 Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select t_n0.i from t_n0 where t_n0.j NOT IN (select t1_n0.i from t_n0 t1_n0 ) @@ -6786,11 +6822,11 @@ Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select t_n0.i from t_n0 where t_n0.j NOT IN (select t1_n0.i from t_n0 t1_n0 ) PREHOOK: type: QUERY PREHOOK: Input: default@t_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select t_n0.i from t_n0 where t_n0.j NOT IN (select t1_n0.i from t_n0 t1_n0 ) POSTHOOK: type: QUERY POSTHOOK: Input: default@t_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 1 4 Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product @@ -6934,11 +6970,11 @@ Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduc PREHOOK: query: select t_n0.i from t_n0 where t_n0.i NOT IN (select t1_n0.i from t_n0 t1_n0 ) PREHOOK: type: QUERY PREHOOK: Input: default@t_n0 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select t_n0.i from t_n0 where t_n0.i NOT IN (select t1_n0.i from t_n0 t1_n0 ) POSTHOOK: type: QUERY POSTHOOK: Input: default@t_n0 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: drop table t1_n0 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table t1_n0 @@ -7168,7 +7204,7 @@ where b.key not in ) PREHOOK: type: QUERY PREHOOK: Input: default@src -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from src b where b.key not in @@ -7178,7 +7214,7 @@ where b.key not in ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 0 val_0 0 val_0 diff --git a/ql/src/test/results/clientpositive/llap/tez_join.q.out b/ql/src/test/results/clientpositive/llap/tez_join.q.out index 53f8895078..475455dd57 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join.q.out @@ -49,7 +49,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -71,7 +73,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: t2_n26 @@ -91,32 +93,47 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -134,7 +151,7 @@ where vt1_n42.id=vt2_n26.id PREHOOK: type: QUERY PREHOOK: Input: default@t1_n42 PREHOOK: Input: default@t2_n26 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select vt1_n42.id from (select rt1_n42.id from (select t1_n42.id, t1_n42.od from t1_n42 order by t1_n42.id, t1_n42.od) rt1_n42) vt1_n42 @@ -145,4 +162,4 @@ where vt1_n42.id=vt2_n26.id POSTHOOK: type: QUERY POSTHOOK: Input: default@t1_n42 POSTHOOK: Input: default@t2_n26 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ###